Index: pom.xml =================================================================== --- pom.xml (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ pom.xml (working copy) @@ -100,7 +100,7 @@ 3.4 1.7.5 0.8.0.RELEASE - 1.1.0-incubating + 1.2.0-incubating-SNAPSHOT 3.2.6 3.2.10 3.2.9 Property changes on: hbase-handler/pom.xml ___________________________________________________________________ Modified: svn:mergeinfo Reverse-merged /hive/branches/cbo/hbase-handler/pom.xml:r1605012-1627125 Merged /hive/trunk/hbase-handler/pom.xml:r1605012-1660746 Index: metastore/bin/.gitignore =================================================================== --- metastore/bin/.gitignore (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ metastore/bin/.gitignore (working copy) @@ -1 +1 @@ -# Dummy file to make Git recognize this empty directory +/src/ Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -703,6 +703,9 @@ // CBO related HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."), + HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", true, "Flag to control calcite plan to hive operator conversion"), + EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", true, "Flag to control enabling the extended cost model based on" + + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."), // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, // need to remove by hive .13. Also, do not change default (see SMB operator) Index: ql/src/test/results/clientpositive/ptf_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/ptf_streaming.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/ptf_streaming.q.out (working copy) @@ -97,28 +97,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -313,7 +313,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -321,7 +321,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -637,7 +637,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -645,7 +645,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -814,28 +814,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1061,28 +1061,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1310,28 +1310,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1559,28 +1559,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1785,33 +1785,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1819,7 +1819,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2074,28 +2074,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2364,28 +2364,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2628,28 +2628,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out (working copy) @@ -50,20 +50,20 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10 Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) + expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double), $f9 (type: double), UDFToDouble($f10) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/groupby_map_ppr.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_map_ppr.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_map_ppr.q.out (working copy) @@ -106,23 +106,27 @@ Map Operator Tree: TableScan alias: src - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1), sum(_col1) - keys: _col0 (type: string), _col1 (type: string) + aggregations: count(DISTINCT $f1), sum($f1) + keys: $f0 (type: string), $f1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col3 (type: double) auto parallelism: false @@ -221,27 +225,125 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + /srcpart/ds=2008-04-09/hr=11 [src] + /srcpart/ds=2008-04-09/hr=12 [src] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string) + outputColumnNames: _o__c0, _o__c1, _o__c2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -298,6 +400,8 @@ PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 POSTHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 @@ -308,8 +412,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 @@ -320,13 +426,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 00.0 -1 71 132828.0 -2 69 251142.0 -3 62 364008.0 -4 74 4105526.0 -5 6 5794.0 -6 5 6796.0 -7 6 71470.0 -8 8 81524.0 -9 7 92094.0 Index: ql/src/test/results/clientpositive/bucket_map_join_1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_map_join_1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out (working copy) @@ -261,4 +261,4 @@ POSTHOOK: Input: default@table1 POSTHOOK: Input: default@table2 #### A masked pattern was here #### -4 +0 Index: ql/src/test/results/clientpositive/annotate_stats_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_limit.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out (working copy) @@ -76,11 +76,7 @@ Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 4 rawDataSize: 396 @@ -95,18 +91,14 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: 4 + limit: -1 Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Limit Number of rows: 4 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- greater than the available number of rows @@ -123,18 +115,14 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: 16 + limit: -1 Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Limit Number of rows: 16 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 @@ -149,15 +137,11 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: 0 + limit: -1 Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Index: ql/src/test/results/clientpositive/groupby_ppd.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_ppd.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_ppd.q.out (working copy) @@ -13,11 +13,11 @@ POSTHOOK: query: explain select * from (select foo, bar from (select bar, foo from invites c union all select bar, foo from invites d) b) a group by bar, foo having bar=1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -27,64 +27,28 @@ predicate: (bar = 1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: foo (type: int) - outputColumnNames: _col1 + expressions: 1 (type: int), foo (type: int) + outputColumnNames: bar, foo Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 1 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - keys: _col0 (type: int), _col1 (type: int) + keys: bar (type: int), foo (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (bar = 1) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: foo (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 1 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + outputColumnNames: bar, foo Select Operator - expressions: _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + expressions: foo (type: int), bar (type: int) + outputColumnNames: (tok_table_or_col foo), (tok_table_or_col bar) File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/groupby6_map_skew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby6_map_skew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby6_map_skew.q.out (working copy) @@ -33,10 +33,10 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -71,7 +71,7 @@ Group By Operator keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -114,13 +114,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 Index: ql/src/test/results/clientpositive/ctas_uses_database_location.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas_uses_database_location.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/ctas_uses_database_location.q.out (working copy) @@ -44,10 +44,6 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -144,10 +140,10 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE true - numFiles 1 - numRows 500 - rawDataSize 5312 - totalSize 5812 + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 #### A masked pattern was here #### # Storage Information Index: ql/src/test/results/clientpositive/groupby4_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby4_map.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby4_map.q.out (working copy) @@ -25,9 +25,11 @@ alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 1 (type: int) + outputColumnNames: $f0 + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count($f0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -39,10 +41,10 @@ Group By Operator aggregations: count(VALUE._col0) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(_col0) (type: int) + expressions: UDFToInteger($f0) (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -84,4 +86,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -500 +0 Index: ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out (working copy) @@ -50,11 +50,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0), sum(DISTINCT _col0), count(DISTINCT _col0) - keys: _col0 (type: string) + aggregations: sum($f0), avg($f0), avg(DISTINCT $f0), max($f0), min($f0), std($f0), stddev_samp($f0), variance($f0), var_samp($f0), sum(DISTINCT $f0), count(DISTINCT $f0) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -67,15 +67,15 @@ Group By Operator aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) + expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double), $f9 (type: double), UDFToDouble($f10) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -131,7 +131,7 @@ POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -146,4 +146,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876 20469.01089779559 79136.0 309.0 +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 0.0 Index: ql/src/test/results/clientpositive/annotate_stats_filter.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out (working copy) @@ -68,11 +68,7 @@ Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: -- column stats are not COMPLETE, so stats are not updated @@ -93,17 +89,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (state = 'OH') (type: boolean) - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -141,17 +133,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (state = 'OH') (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -181,17 +169,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (state <> 'OH') (type: boolean) - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -217,17 +201,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (state <> 'OH') (type: boolean) - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -257,17 +237,17 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: zip is null (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), null (type: void), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: state, locid, zip, year + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -295,17 +275,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (not zip is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -335,17 +311,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: zip is not null (type: boolean) - Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -373,17 +345,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (not zip is null) (type: boolean) - Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -413,11 +381,7 @@ Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 @@ -436,14 +400,10 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE @@ -476,11 +436,7 @@ Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 8 rawDataSize: 804 @@ -499,17 +455,17 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: 'foo' (type: string) - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -537,11 +493,7 @@ Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 @@ -560,14 +512,10 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE @@ -598,14 +546,10 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE @@ -636,14 +580,10 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE @@ -676,17 +616,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: ((state = 'OH') or (state = 'CA')) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -716,17 +652,17 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: ((year = 2001) and year is null) (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), null (type: void) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: state, locid, zip, year + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -754,17 +690,17 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (((year = 2001) and (state = 'OH')) and (state = 'FL')) (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: 'FL' (type: string), locid (type: int), zip (type: bigint), 2001 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + expressions: state (type: string), locid (type: int), zip (type: bigint), 2001 (type: int) + outputColumnNames: state, locid, zip, year + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -794,17 +730,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (((year = 2001) and year is null) or (state = 'CA')) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -834,17 +766,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (((year = 2001) or year is null) and (state = 'CA')) (type: boolean) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'CA' (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -874,17 +802,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (locid < 30) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -910,17 +834,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (locid > 30) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -946,17 +866,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (locid <= 30) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -982,17 +898,13 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Filter Operator predicate: (locid >= 30) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/groupby_sort_8.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_8.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_sort_8.q.out (working copy) @@ -53,28 +53,24 @@ Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col0) + aggregations: count(DISTINCT key) bucketGroup: true - keys: _col0 (type: string) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -100,7 +96,7 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 #### A masked pattern was here #### -5 +0 PREHOOK: query: -- In testmode, the plan is not changed EXPLAIN select count(distinct key) from T1 @@ -119,26 +115,22 @@ Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col0) - keys: _col0 (type: string) + aggregations: count(DISTINCT key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col0:0._col0) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -164,7 +156,7 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 #### A masked pattern was here #### -5 +0 PREHOOK: query: DROP TABLE T1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@t1 Index: ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out =================================================================== --- ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out (working copy) @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[7][tables = [key, value, key, value]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- SORT_QUERY_RESULTS explain extended @@ -83,15 +83,11 @@ alias: b Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: string), _col1 (type: string) + value expressions: key (type: string), value (type: string) auto parallelism: false TableScan alias: a @@ -101,15 +97,11 @@ isSamplingPred: false predicate: (key = '429') (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: key (type: string), value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -203,8 +195,8 @@ name: default.src1 name: default.src1 Truncated Path -> Alias: - /src [$hdt$_1:a] - /src1 [$hdt$_0:b] + /src [c:a] + /src1 [c:b] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -213,11 +205,11 @@ keys: 0 1 - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: key, value, key0, value0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '429' (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key0 (type: string), value0 (type: string), key (type: string), value (type: string) + outputColumnNames: key, value, key0, value0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -230,7 +222,7 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 + columns key,value,key0,value0 columns.types string:string:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true @@ -247,7 +239,7 @@ Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[7][tables = [key, value, key, value]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c PREHOOK: type: QUERY PREHOOK: Input: default@src Index: ql/src/test/results/clientpositive/groupby1_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_map.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby1_map.q.out (working copy) @@ -30,11 +30,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) + aggregations: sum($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -49,10 +49,10 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + expressions: UDFToInteger($f0) (type: int), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -95,312 +95,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 -105 105.0 -11 11.0 -111 111.0 -113 226.0 -114 114.0 -116 116.0 -118 236.0 -119 357.0 -12 24.0 -120 240.0 -125 250.0 -126 126.0 -128 384.0 -129 258.0 -131 131.0 -133 133.0 -134 268.0 -136 136.0 -137 274.0 -138 552.0 -143 143.0 -145 145.0 -146 292.0 -149 298.0 -15 30.0 -150 150.0 -152 304.0 -153 153.0 -155 155.0 -156 156.0 -157 157.0 -158 158.0 -160 160.0 -162 162.0 -163 163.0 -164 328.0 -165 330.0 -166 166.0 -167 501.0 -168 168.0 -169 676.0 -17 17.0 -170 170.0 -172 344.0 -174 348.0 -175 350.0 -176 352.0 -177 177.0 -178 178.0 -179 358.0 -18 36.0 -180 180.0 -181 181.0 -183 183.0 -186 186.0 -187 561.0 -189 189.0 -19 19.0 -190 190.0 -191 382.0 -192 192.0 -193 579.0 -194 194.0 -195 390.0 -196 196.0 -197 394.0 -199 597.0 -2 2.0 -20 20.0 -200 400.0 -201 201.0 -202 202.0 -203 406.0 -205 410.0 -207 414.0 -208 624.0 -209 418.0 -213 426.0 -214 214.0 -216 432.0 -217 434.0 -218 218.0 -219 438.0 -221 442.0 -222 222.0 -223 446.0 -224 448.0 -226 226.0 -228 228.0 -229 458.0 -230 1150.0 -233 466.0 -235 235.0 -237 474.0 -238 476.0 -239 478.0 -24 48.0 -241 241.0 -242 484.0 -244 244.0 -247 247.0 -248 248.0 -249 249.0 -252 252.0 -255 510.0 -256 512.0 -257 257.0 -258 258.0 -26 52.0 -260 260.0 -262 262.0 -263 263.0 -265 530.0 -266 266.0 -27 27.0 -272 544.0 -273 819.0 -274 274.0 -275 275.0 -277 1108.0 -278 556.0 -28 28.0 -280 560.0 -281 562.0 -282 564.0 -283 283.0 -284 284.0 -285 285.0 -286 286.0 -287 287.0 -288 576.0 -289 289.0 -291 291.0 -292 292.0 -296 296.0 -298 894.0 -30 30.0 -302 302.0 -305 305.0 -306 306.0 -307 614.0 -308 308.0 -309 618.0 -310 310.0 -311 933.0 -315 315.0 -316 948.0 -317 634.0 -318 954.0 -321 642.0 -322 644.0 -323 323.0 -325 650.0 -327 981.0 -33 33.0 -331 662.0 -332 332.0 -333 666.0 -335 335.0 -336 336.0 -338 338.0 -339 339.0 -34 34.0 -341 341.0 -342 684.0 -344 688.0 -345 345.0 -348 1740.0 -35 105.0 -351 351.0 -353 706.0 -356 356.0 -360 360.0 -362 362.0 -364 364.0 -365 365.0 -366 366.0 -367 734.0 -368 368.0 -369 1107.0 -37 74.0 -373 373.0 -374 374.0 -375 375.0 -377 377.0 -378 378.0 -379 379.0 -382 764.0 -384 1152.0 -386 386.0 -389 389.0 -392 392.0 -393 393.0 -394 394.0 -395 790.0 -396 1188.0 -397 794.0 -399 798.0 -4 4.0 -400 400.0 -401 2005.0 -402 402.0 -403 1209.0 -404 808.0 -406 1624.0 -407 407.0 -409 1227.0 -41 41.0 -411 411.0 -413 826.0 -414 828.0 -417 1251.0 -418 418.0 -419 419.0 -42 84.0 -421 421.0 -424 848.0 -427 427.0 -429 858.0 -43 43.0 -430 1290.0 -431 1293.0 -432 432.0 -435 435.0 -436 436.0 -437 437.0 -438 1314.0 -439 878.0 -44 44.0 -443 443.0 -444 444.0 -446 446.0 -448 448.0 -449 449.0 -452 452.0 -453 453.0 -454 1362.0 -455 455.0 -457 457.0 -458 916.0 -459 918.0 -460 460.0 -462 924.0 -463 926.0 -466 1398.0 -467 467.0 -468 1872.0 -469 2345.0 -47 47.0 -470 470.0 -472 472.0 -475 475.0 -477 477.0 -478 956.0 -479 479.0 -480 1440.0 -481 481.0 -482 482.0 -483 483.0 -484 484.0 -485 485.0 -487 487.0 -489 1956.0 -490 490.0 -491 491.0 -492 984.0 -493 493.0 -494 494.0 -495 495.0 -496 496.0 -497 497.0 -498 1494.0 -5 15.0 -51 102.0 -53 53.0 -54 54.0 -57 57.0 -58 116.0 -64 64.0 -65 65.0 -66 66.0 -67 134.0 -69 69.0 -70 210.0 -72 144.0 -74 74.0 -76 152.0 -77 77.0 -78 78.0 -8 8.0 -80 80.0 -82 82.0 -83 166.0 -84 168.0 -85 85.0 -86 86.0 -87 87.0 -9 9.0 -90 270.0 -92 92.0 -95 190.0 -96 96.0 -97 194.0 -98 196.0 Index: ql/src/test/results/clientpositive/groupby2_map_skew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_map_skew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby2_map_skew.q.out (working copy) @@ -28,11 +28,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1), sum(_col1) - keys: _col0 (type: string), _col1 (type: string) + aggregations: count(DISTINCT $f1), sum($f1) + keys: $f0 (type: string), $f1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -47,10 +47,14 @@ aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string) + outputColumnNames: _o__c0, _o__c1, _o__c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -85,7 +89,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 order by key @@ -96,13 +100,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 00.0 -1 71 116414.0 -2 69 225571.0 -3 62 332004.0 -4 74 452763.0 -5 6 5397.0 -6 5 6398.0 -7 6 7735.0 -8 8 8762.0 -9 7 91047.0 Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -109,25 +109,71 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -141,7 +187,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -150,31 +196,31 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -183,11 +229,11 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -356,7 +402,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -405,8 +451,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out (working copy) @@ -91,8 +91,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL -5 2 PREHOOK: query: -- This filter is pushed down through aggregate with grouping sets by Calcite EXPLAIN SELECT a, b FROM @@ -121,25 +119,24 @@ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: '5' (type: string), b (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: a, b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: a (type: string), b (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: a, b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -166,5 +163,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL -5 2 Index: ql/src/test/results/clientpositive/database_drop.q.out =================================================================== --- ql/src/test/results/clientpositive/database_drop.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/database_drop.q.out (working copy) @@ -163,11 +163,13 @@ PREHOOK: type: ALTERINDEX_REBUILD PREHOOK: Input: db5@part_tab PREHOOK: Input: db5@part_tab@ds=2008-04-09 +PREHOOK: Input: db5@part_tab@ds=2009-04-09 PREHOOK: Output: db5@db5__part_tab_idx3__@ds=2008-04-09 POSTHOOK: query: ALTER INDEX idx3 ON part_tab PARTITION (ds='2008-04-09') REBUILD POSTHOOK: type: ALTERINDEX_REBUILD POSTHOOK: Input: db5@part_tab POSTHOOK: Input: db5@part_tab@ds=2008-04-09 +POSTHOOK: Input: db5@part_tab@ds=2009-04-09 POSTHOOK: Output: db5@db5__part_tab_idx3__@ds=2008-04-09 POSTHOOK: Lineage: db5__part_tab_idx3__ PARTITION(ds=2008-04-09)._bucketname SIMPLE [(part_tab)part_tab.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__part_tab_idx3__ PARTITION(ds=2008-04-09)._offsets EXPRESSION [(part_tab)part_tab.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -175,11 +177,13 @@ PREHOOK: query: ALTER INDEX idx3 ON part_tab PARTITION (ds='2009-04-09') REBUILD PREHOOK: type: ALTERINDEX_REBUILD PREHOOK: Input: db5@part_tab +PREHOOK: Input: db5@part_tab@ds=2008-04-09 PREHOOK: Input: db5@part_tab@ds=2009-04-09 PREHOOK: Output: db5@db5__part_tab_idx3__@ds=2009-04-09 POSTHOOK: query: ALTER INDEX idx3 ON part_tab PARTITION (ds='2009-04-09') REBUILD POSTHOOK: type: ALTERINDEX_REBUILD POSTHOOK: Input: db5@part_tab +POSTHOOK: Input: db5@part_tab@ds=2008-04-09 POSTHOOK: Input: db5@part_tab@ds=2009-04-09 POSTHOOK: Output: db5@db5__part_tab_idx3__@ds=2009-04-09 POSTHOOK: Lineage: db5__part_tab_idx3__ PARTITION(ds=2009-04-09)._bucketname SIMPLE [(part_tab)part_tab.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] @@ -232,11 +236,13 @@ PREHOOK: type: ALTERINDEX_REBUILD PREHOOK: Input: db5@part_tab2 PREHOOK: Input: db5@part_tab2@ds=2008-04-09 +PREHOOK: Input: db5@part_tab2@ds=2009-04-09 PREHOOK: Output: db5@db5__part_tab2_idx4__@ds=2008-04-09 POSTHOOK: query: ALTER INDEX idx4 ON part_tab2 PARTITION (ds='2008-04-09') REBUILD POSTHOOK: type: ALTERINDEX_REBUILD POSTHOOK: Input: db5@part_tab2 POSTHOOK: Input: db5@part_tab2@ds=2008-04-09 +POSTHOOK: Input: db5@part_tab2@ds=2009-04-09 POSTHOOK: Output: db5@db5__part_tab2_idx4__@ds=2008-04-09 POSTHOOK: Lineage: db5__part_tab2_idx4__ PARTITION(ds=2008-04-09)._bucketname SIMPLE [(part_tab2)part_tab2.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__part_tab2_idx4__ PARTITION(ds=2008-04-09)._offsets EXPRESSION [(part_tab2)part_tab2.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -244,11 +250,13 @@ PREHOOK: query: ALTER INDEX idx4 ON part_tab2 PARTITION (ds='2009-04-09') REBUILD PREHOOK: type: ALTERINDEX_REBUILD PREHOOK: Input: db5@part_tab2 +PREHOOK: Input: db5@part_tab2@ds=2008-04-09 PREHOOK: Input: db5@part_tab2@ds=2009-04-09 PREHOOK: Output: db5@db5__part_tab2_idx4__@ds=2009-04-09 POSTHOOK: query: ALTER INDEX idx4 ON part_tab2 PARTITION (ds='2009-04-09') REBUILD POSTHOOK: type: ALTERINDEX_REBUILD POSTHOOK: Input: db5@part_tab2 +POSTHOOK: Input: db5@part_tab2@ds=2008-04-09 POSTHOOK: Input: db5@part_tab2@ds=2009-04-09 POSTHOOK: Output: db5@db5__part_tab2_idx4__@ds=2009-04-09 POSTHOOK: Lineage: db5__part_tab2_idx4__ PARTITION(ds=2009-04-09)._bucketname SIMPLE [(part_tab2)part_tab2.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] @@ -309,12 +317,16 @@ PREHOOK: query: ALTER INDEX idx5 ON part_tab3 PARTITION (ds='2008-04-09') REBUILD PREHOOK: type: ALTERINDEX_REBUILD PREHOOK: Input: db5@part_tab3 +PREHOOK: Input: db5@part_tab3@ds=2007-04-09 PREHOOK: Input: db5@part_tab3@ds=2008-04-09 +PREHOOK: Input: db5@part_tab3@ds=2009-04-09 PREHOOK: Output: db5@db5__part_tab3_idx5__@ds=2008-04-09 POSTHOOK: query: ALTER INDEX idx5 ON part_tab3 PARTITION (ds='2008-04-09') REBUILD POSTHOOK: type: ALTERINDEX_REBUILD POSTHOOK: Input: db5@part_tab3 +POSTHOOK: Input: db5@part_tab3@ds=2007-04-09 POSTHOOK: Input: db5@part_tab3@ds=2008-04-09 +POSTHOOK: Input: db5@part_tab3@ds=2009-04-09 POSTHOOK: Output: db5@db5__part_tab3_idx5__@ds=2008-04-09 POSTHOOK: Lineage: db5__part_tab3_idx5__ PARTITION(ds=2008-04-09)._bucketname SIMPLE [(part_tab3)part_tab3.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__part_tab3_idx5__ PARTITION(ds=2008-04-09)._offsets EXPRESSION [(part_tab3)part_tab3.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -322,11 +334,15 @@ PREHOOK: query: ALTER INDEX idx5 ON part_tab3 PARTITION (ds='2009-04-09') REBUILD PREHOOK: type: ALTERINDEX_REBUILD PREHOOK: Input: db5@part_tab3 +PREHOOK: Input: db5@part_tab3@ds=2007-04-09 +PREHOOK: Input: db5@part_tab3@ds=2008-04-09 PREHOOK: Input: db5@part_tab3@ds=2009-04-09 PREHOOK: Output: db5@db5__part_tab3_idx5__@ds=2009-04-09 POSTHOOK: query: ALTER INDEX idx5 ON part_tab3 PARTITION (ds='2009-04-09') REBUILD POSTHOOK: type: ALTERINDEX_REBUILD POSTHOOK: Input: db5@part_tab3 +POSTHOOK: Input: db5@part_tab3@ds=2007-04-09 +POSTHOOK: Input: db5@part_tab3@ds=2008-04-09 POSTHOOK: Input: db5@part_tab3@ds=2009-04-09 POSTHOOK: Output: db5@db5__part_tab3_idx5__@ds=2009-04-09 POSTHOOK: Lineage: db5__part_tab3_idx5__ PARTITION(ds=2009-04-09)._bucketname SIMPLE [(part_tab3)part_tab3.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] Index: ql/src/test/results/clientpositive/create_view_partitioned.q.out =================================================================== --- ql/src/test/results/clientpositive/create_view_partitioned.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/create_view_partitioned.q.out (working copy) @@ -103,7 +103,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@vp1 #### A masked pattern was here #### -86 val_86 PREHOOK: query: SELECT key FROM vp1 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -114,7 +113,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@vp1 #### A masked pattern was here #### -86 PREHOOK: query: SELECT value FROM vp1 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -125,7 +123,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@vp1 #### A masked pattern was here #### -val_86 PREHOOK: query: ALTER VIEW vp1 ADD PARTITION (value='val_86') PARTITION (value='val_xyz') PREHOOK: type: ALTERTABLE_ADDPARTS @@ -240,7 +237,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@vp1 #### A masked pattern was here #### -86 val_86 PREHOOK: query: -- test a partitioned view on top of an underlying partitioned table, -- but with only a suffix of the partitioning columns CREATE VIEW vp2 @@ -320,14 +316,18 @@ PREHOOK: query: SELECT key FROM vp2 WHERE hr='12' ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@vp2 #### A masked pattern was here #### POSTHOOK: query: SELECT key FROM vp2 WHERE hr='12' ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@vp2 #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/groupby6.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby6.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby6.q.out (working copy) @@ -29,10 +29,10 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -62,7 +62,7 @@ Group By Operator keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Index: ql/src/test/results/clientpositive/groupby1.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby1.q.out (working copy) @@ -31,14 +31,14 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: $f1 (type: string) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -68,10 +68,10 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + expressions: UDFToInteger($f0) (type: int), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/cbo_join.q.out =================================================================== --- ql/src/test/results/clientpositive/cbo_join.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/cbo_join.q.out (working copy) @@ -1,4 +1,5 @@ -PREHOOK: query: -- 4. Test Select + Join + TS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6,7 +7,8 @@ PREHOOK: Input: default@cbo_t2 PREHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: -- 4. Test Select + Join + TS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 @@ -122,46 +124,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL 1 1 1 @@ -522,6 +484,46 @@ 1 1 1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -632,8 +634,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -730,6 +730,8 @@ 1 1 1 1 1 1 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -744,8 +746,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -847,6 +847,8 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -861,10 +863,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -966,6 +964,10 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL +NULL NULL +NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5334,8 +5336,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -5870,6 +5870,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6430,8 +6432,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -6966,6 +6966,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: -- 5. Test Select + Join + FIL + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0) PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/groupby4_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby4_noskew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby4_noskew.q.out (working copy) @@ -32,18 +32,18 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: $f0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Index: ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out (working copy) @@ -123,23 +123,27 @@ Map Operator Tree: TableScan alias: src - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1), sum(_col1), sum(DISTINCT _col1), count(DISTINCT _col2) - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + aggregations: count(DISTINCT $f1), sum($f1), sum(DISTINCT $f1), count(DISTINCT $f2) + keys: $f0 (type: string), $f1 (type: string), $f2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col4 (type: double) auto parallelism: false @@ -238,27 +242,125 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + /srcpart/ds=2008-04-09/hr=11 [src] + /srcpart/ds=2008-04-09/hr=12 [src] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(DISTINCT KEY._col1:2._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string), $f3 (type: double), $f4 (type: bigint) + outputColumnNames: _o__c0, _o__c1, _o__c2, _o__c3, _o__c4 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string), UDFToInteger(_o__c3) (type: int), UDFToInteger(_o__c4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -315,6 +417,8 @@ PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 POSTHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 @@ -325,11 +429,13 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.null, ] -POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY @@ -339,13 +445,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 00.0 0 1 -1 71 132828.0 10044 71 -2 69 251142.0 15780 69 -3 62 364008.0 20119 62 -4 74 4105526.0 30965 74 -5 6 5794.0 278 6 -6 5 6796.0 331 5 -7 6 71470.0 447 6 -8 8 81524.0 595 8 -9 7 92094.0 577 7 Index: ql/src/test/results/clientpositive/alias_casted_column.q.out =================================================================== --- ql/src/test/results/clientpositive/alias_casted_column.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/alias_casted_column.q.out (working copy) @@ -17,7 +17,7 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(key) (type: int) - outputColumnNames: _col0 + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -52,7 +52,7 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(key) (type: int) - outputColumnNames: _col0 + outputColumnNames: key2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Index: ql/src/test/results/clientpositive/tez/explainuser_1.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out (working copy) @@ -3665,7 +3665,7 @@ outputColumnNames:["_col0"] Statistics:Num rows: 13 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator [FIL_26] - predicate:_wcol0 is not null (type: boolean) + predicate:first_value_window_0 is not null (type: boolean) Statistics:Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator [PTF_11] Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}] @@ -7596,9 +7596,9 @@ Map-reduce partition columns:_col2 (type: string) sort order:++ Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - value expressions:_wcol0 (type: bigint), _col5 (type: int) + value expressions:sum_window_0 (type: bigint), _col5 (type: int) Select Operator [SEL_13] - outputColumnNames:["_col1","_col2","_col5","_wcol0"] + outputColumnNames:["_col1","_col2","_col5","sum_window_0"] Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE PTF Operator [PTF_12] Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}] Index: ql/src/test/results/clientpositive/tez/ptf_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/ptf_streaming.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/tez/ptf_streaming.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -615,7 +615,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -623,7 +623,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -788,28 +788,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1021,28 +1021,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1256,28 +1256,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1491,28 +1491,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1705,33 +1705,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1739,7 +1739,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1980,28 +1980,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2246,28 +2246,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2496,28 +2496,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/tez/explainuser_2.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out (working copy) @@ -53,11 +53,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@ss POSTHOOK: Lineage: ss.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: INSERT OVERWRITE TABLE sr SELECT x.key,x.value,y.key,y.value,z.key,z.value FROM src1 x @@ -81,11 +81,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@sr POSTHOOK: Lineage: sr.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sr.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sr.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: sr.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: sr.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: sr.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: INSERT OVERWRITE TABLE cs SELECT x.key,x.value,y.key,y.value,z.key,z.value FROM src1 x @@ -195,7 +195,7 @@ Merge Join Operator [MERGEJOIN_29] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col5"] + | outputColumnNames:["_col0","_col4","_col5"] | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] | Reduce Output Operator [RS_14] @@ -203,15 +203,14 @@ | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_1] - | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_25] - | predicate:key is not null (type: boolean) + | predicate:value is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_0] - | alias:y + | alias:z | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Reducer 4 [SIMPLE_EDGE] Reduce Output Operator [RS_16] @@ -219,11 +218,11 @@ Map-reduce partition columns:_col3 (type: string) sort order:+ Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions:_col0 (type: string) + value expressions:_col1 (type: string), _col2 (type: string) Merge Join Operator [MERGEJOIN_28] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col0","_col3"] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE |<-Map 3 [SIMPLE_EDGE] | Reduce Output Operator [RS_8] @@ -231,27 +230,28 @@ | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) | Select Operator [SEL_4] - | outputColumnNames:["_col0"] + | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_26] - | predicate:value is not null (type: boolean) + | predicate:key is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:z + | TableScan [TS_3] + | alias:y | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map 5 [SIMPLE_EDGE] Reduce Output Operator [RS_10] - key expressions:_col1 (type: string) - Map-reduce partition columns:_col1 (type: string) + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) sort order:+ Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions:_col0 (type: string) + value expressions:_col1 (type: string) Select Operator [SEL_6] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Filter Operator [FIL_27] - predicate:(value is not null and key is not null) (type: boolean) + predicate:(key is not null and value is not null) (type: boolean) Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE TableScan [TS_5] alias:x @@ -315,21 +315,21 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 9 <- Reducer 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 + Reducer 6 File Output Operator [FS_71] compressed:false Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE @@ -339,52 +339,164 @@ Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_69] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 4 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] Reduce Output Operator [RS_68] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_66] | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] Reduce Output Operator [RS_65] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_64] aggregations:["count(_col3)","count(_col4)","count(_col5)"] keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_62] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_113] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_111] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col15 (type: string), _col17 (type: string)","0":"_col1 (type: string), _col3 (type: string)"} - | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] - | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + | keys:{"1":"_col8 (type: string), _col10 (type: string)","0":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 12 [SIMPLE_EDGE] + | Reduce Output Operator [RS_60] + | key expressions:_col8 (type: string), _col10 (type: string) + | Map-reduce partition columns:_col8 (type: string), _col10 (type: string) + | sort order:++ + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col6 (type: string), _col7 (type: string) + | Select Operator [SEL_46] + | outputColumnNames:["_col10","_col6","_col7","_col8"] + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_109] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col6","_col7","_col8","_col10"] + | | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + | |<-Map 11 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_42] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_19] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_101] + | | predicate:((key = 'src1key') and value is not null) (type: boolean) + | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_17] + | | alias:src1 + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 14 [SIMPLE_EDGE] + | Reduce Output Operator [RS_44] + | key expressions:_col5 (type: string) + | Map-reduce partition columns:_col5 (type: string) + | sort order:+ + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string) + | Merge Join Operator [MERGEJOIN_108] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col4","_col5","_col6","_col8"] + | | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + | |<-Map 13 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_22] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_102] + | | predicate:((value = 'd1value') and key is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_20] + | | alias:d1 + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 16 [SIMPLE_EDGE] + | Reduce Output Operator [RS_38] + | key expressions:_col2 (type: string) + | Map-reduce partition columns:_col2 (type: string) + | sort order:+ + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) + | Merge Join Operator [MERGEJOIN_107] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col4","_col6"] + | | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + | |<-Map 15 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_30] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_25] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_103] + | | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_23] + | | alias:srcpart + | | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + | |<-Map 17 [SIMPLE_EDGE] + | Reduce Output Operator [RS_32] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) + | sort order:+ + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + | Select Operator [SEL_28] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_104] + | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_26] + | alias:ss + | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_58] + key expressions:_col8 (type: string), _col10 (type: string) + Map-reduce partition columns:_col8 (type: string), _col10 (type: string) + sort order:++ + Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string) + Merge Join Operator [MERGEJOIN_110] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col3 (type: string), _col5 (type: string)","0":"_col1 (type: string), _col3 (type: string)"} + | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"] + | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_58] + | Reduce Output Operator [RS_53] | key expressions:_col1 (type: string), _col3 (type: string) | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) | sort order:++ | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE | value expressions:_col2 (type: string) - | Merge Join Operator [MERGEJOIN_107] + | Merge Join Operator [MERGEJOIN_105] | | condition map:[{"":"Inner Join 0 to 1"}] | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col2","_col3"] | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE | |<-Map 1 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_53] + | | Reduce Output Operator [RS_48] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ @@ -393,14 +505,14 @@ | | Select Operator [SEL_1] | | outputColumnNames:["_col0","_col1","_col2","_col3"] | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_99] + | | Filter Operator [FIL_97] | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_0] | | alias:cs | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE - | |<-Map 6 [SIMPLE_EDGE] - | Reduce Output Operator [RS_55] + | |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_50] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ @@ -408,111 +520,26 @@ | Select Operator [SEL_4] | outputColumnNames:["_col0"] | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] + | Filter Operator [FIL_98] | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_2] | alias:d1 | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Reducer 9 [SIMPLE_EDGE] - Reduce Output Operator [RS_60] - key expressions:_col15 (type: string), _col17 (type: string) - Map-reduce partition columns:_col15 (type: string), _col17 (type: string) + Reduce Output Operator [RS_55] + key expressions:_col3 (type: string), _col5 (type: string) + Map-reduce partition columns:_col3 (type: string), _col5 (type: string) sort order:++ - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - value expressions:_col6 (type: string), _col7 (type: string), _col14 (type: string) - Select Operator [SEL_51] - outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_112] + Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: string), _col4 (type: string) + Merge Join Operator [MERGEJOIN_106] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col2 (type: string), _col4 (type: string)","0":"_col8 (type: string), _col10 (type: string)"} - | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 16 [SIMPLE_EDGE] - | Reduce Output Operator [RS_49] - | key expressions:_col2 (type: string), _col4 (type: string) - | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) - | sort order:++ + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col2","_col3","_col4","_col5"] | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col3 (type: string), _col5 (type: string) - | Merge Join Operator [MERGEJOIN_111] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 15 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_36] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - | | Select Operator [SEL_31] - | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_105] - | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_29] - | | alias:sr - | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | |<-Map 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_38] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_34] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_106] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_32] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_47] - key expressions:_col8 (type: string), _col10 (type: string) - Map-reduce partition columns:_col8 (type: string), _col10 (type: string) - sort order:++ - Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - value expressions:_col6 (type: string), _col7 (type: string) - Merge Join Operator [MERGEJOIN_110] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"} - | outputColumnNames:["_col6","_col7","_col8","_col10"] - | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - |<-Map 7 [SIMPLE_EDGE] - | Reduce Output Operator [RS_42] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_7] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_101] - | predicate:((key = 'src1key') and value is not null) (type: boolean) - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_5] - | alias:src1 - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 11 [SIMPLE_EDGE] - Reduce Output Operator [RS_44] - key expressions:_col5 (type: string) - Map-reduce partition columns:_col5 (type: string) - sort order:+ - Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string) - Merge Join Operator [MERGEJOIN_109] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col4","_col5","_col6","_col8"] - | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_24] + | Reduce Output Operator [RS_14] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ @@ -520,54 +547,27 @@ | Select Operator [SEL_10] | outputColumnNames:["_col0"] | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_102] - | predicate:((value = 'd1value') and key is not null) (type: boolean) + | Filter Operator [FIL_100] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_8] | alias:d1 | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 13 [SIMPLE_EDGE] - Reduce Output Operator [RS_26] - key expressions:_col2 (type: string) - Map-reduce partition columns:_col2 (type: string) + |<-Map 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) - Merge Join Operator [MERGEJOIN_108] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col6"] - | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - |<-Map 12 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_13] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_103] - | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_11] - | alias:srcpart - | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - |<-Map 14 [SIMPLE_EDGE] - Reduce Output Operator [RS_20] - key expressions:_col3 (type: string) - Map-reduce partition columns:_col3 (type: string) - sort order:+ Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - Select Operator [SEL_16] - outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Select Operator [SEL_7] + outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_104] - predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + Filter Operator [FIL_99] + predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean) Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_14] - alias:ss + TableScan [TS_5] + alias:sr Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -590,33 +590,33 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 13 <- Union 12 (SIMPLE_EDGE) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS) -Map 11 <- Union 12 (CONTAINS) -Map 1 <- Union 2 (CONTAINS) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) -Map 7 <- Union 2 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE) -Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Map 15 <- Union 12 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 13 <- Union 14 (CONTAINS) +Map 5 <- Union 6 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Map 9 <- Union 6 (CONTAINS) +Reducer 8 <- Map 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 15 <- Union 14 (SIMPLE_EDGE) +Reducer 16 <- Map 18 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Map 17 <- Union 14 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 + Reducer 4 File Output Operator [FS_61] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_59] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 5 [SIMPLE_EDGE] - |<-Reducer 14 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 2 [CONTAINS] | Reduce Output Operator [RS_58] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -624,196 +624,196 @@ | Group By Operator [GBY_57] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_53] + | Select Operator [SEL_26] | outputColumnNames:["_col0","_col1"] | Merge Join Operator [MERGEJOIN_85] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col2"] - | |<-Reducer 13 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_49] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_37] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_36] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 12 [SIMPLE_EDGE] - | | |<-Map 11 [CONTAINS] - | | | Reduce Output Operator [RS_35] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_34] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_28] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_78] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_27] - | | | alias:x - | | |<-Map 15 [CONTAINS] - | | Reduce Output Operator [RS_35] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_34] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_30] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_79] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_29] - | | alias:y - | |<-Reducer 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_51] - | key expressions:_col2 (type: string) - | Map-reduce partition columns:_col2 (type: string) - | sort order:+ - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Merge Join Operator [MERGEJOIN_83] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col2"] - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Map 16 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_43] + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_22] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_39] + | | Select Operator [SEL_1] | | outputColumnNames:["_col0"] | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_80] + | | Filter Operator [FIL_76] | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_38] + | | TableScan [TS_0] | | alias:y | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Map 18 [SIMPLE_EDGE] - | Reduce Output Operator [RS_45] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | |<-Reducer 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_41] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_81] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_40] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_58] - key expressions:_col0 (type: string), _col1 (type: string) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - sort order:++ - Group By Operator [GBY_57] - keys:_col0 (type: string), _col1 (type: string) - outputColumnNames:["_col0","_col1"] - Select Operator [SEL_26] - outputColumnNames:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_84] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col0","_col2"] - |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Merge Join Operator [MERGEJOIN_84] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 10 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_14] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_79] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_13] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_16] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] + | Select Operator [SEL_12] | outputColumnNames:["_col0"] | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] + | Group By Operator [GBY_11] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] + | |<-Union 6 [SIMPLE_EDGE] + | |<-Map 5 [CONTAINS] + | | Reduce Output Operator [RS_10] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_7] + | | Group By Operator [GBY_9] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] + | | Select Operator [SEL_3] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_74] + | | Filter Operator [FIL_77] | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] + | | TableScan [TS_2] | | alias:x - | |<-Map 7 [CONTAINS] - | Reduce Output Operator [RS_8] + | |<-Map 9 [CONTAINS] + | Reduce Output Operator [RS_10] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_7] + | Group By Operator [GBY_9] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] + | Select Operator [SEL_5] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_75] + | Filter Operator [FIL_78] | predicate:value is not null (type: boolean) - | TableScan [TS_2] + | TableScan [TS_4] | alias:y - |<-Reducer 9 [SIMPLE_EDGE] - Reduce Output Operator [RS_24] - key expressions:_col2 (type: string) - Map-reduce partition columns:_col2 (type: string) - sort order:+ - Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string) - Merge Join Operator [MERGEJOIN_82] + |<-Reducer 12 [CONTAINS] + Reduce Output Operator [RS_58] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_57] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_53] + outputColumnNames:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_87] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} | outputColumnNames:["_col1","_col2"] - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] + |<-Map 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_49] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_28] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_80] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_27] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 16 [SIMPLE_EDGE] + Reduce Output Operator [RS_51] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string) + Merge Join Operator [MERGEJOIN_86] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + |<-Map 18 [SIMPLE_EDGE] + | Reduce Output Operator [RS_45] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_14] + | value expressions:_col0 (type: string) + | Select Operator [SEL_41] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_77] - | predicate:(key is not null and value is not null) (type: boolean) + | Filter Operator [FIL_83] + | predicate:(value is not null and key is not null) (type: boolean) | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_13] + | TableScan [TS_40] | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_16] + |<-Reducer 15 [SIMPLE_EDGE] + Reduce Output Operator [RS_43] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_12] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_39] outputColumnNames:["_col0"] - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_76] - predicate:key is not null (type: boolean) - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_11] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_38] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + |<-Union 14 [SIMPLE_EDGE] + |<-Map 13 [CONTAINS] + | Reduce Output Operator [RS_37] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_36] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_30] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_81] + | predicate:value is not null (type: boolean) + | TableScan [TS_29] + | alias:x + |<-Map 17 [CONTAINS] + Reduce Output Operator [RS_37] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_36] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_32] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_82] + predicate:value is not null (type: boolean) + TableScan [TS_31] alias:y - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -843,45 +843,45 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Map 24 <- Union 25 (CONTAINS) -Map 32 <- Union 25 (CONTAINS) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 30 <- Union 29 (SIMPLE_EDGE) -Map 13 <- Union 14 (CONTAINS) -Map 34 <- Union 29 (CONTAINS) -Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) -Map 1 <- Union 2 (CONTAINS) -Map 20 <- Union 16 (CONTAINS) -Map 33 <- Union 27 (CONTAINS) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Map 19 <- Union 14 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS) -Reducer 17 <- Union 16 (SIMPLE_EDGE) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 32 <- Union 31 (SIMPLE_EDGE) +Map 11 <- Union 8 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 25 <- Map 24 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 22 <- Union 18 (CONTAINS) +Map 21 <- Union 16 (CONTAINS) +Map 34 <- Union 27 (CONTAINS) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 33 <- Map 37 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Map 36 <- Union 31 (CONTAINS) +Map 35 <- Union 29 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 19 <- Union 18 (SIMPLE_EDGE) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 9 <- Union 8 (SIMPLE_EDGE) +Reducer 17 <- Union 16 (SIMPLE_EDGE), Union 18 (CONTAINS) +Map 15 <- Union 16 (CONTAINS) Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) -Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Map 9 <- Union 2 (CONTAINS) +Map 26 <- Union 27 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 7 <- Union 8 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 + Reducer 6 File Output Operator [FS_122] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_120] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 7 [SIMPLE_EDGE] - |<-Reducer 31 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 25 [CONTAINS] | Reduce Output Operator [RS_119] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -891,149 +891,148 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_114] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_164] + | Merge Join Operator [MERGEJOIN_170] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 30 [SIMPLE_EDGE] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 24 [SIMPLE_EDGE] | | Reduce Output Operator [RS_110] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_71] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_159] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_70] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 33 [SIMPLE_EDGE] + | Reduce Output Operator [RS_112] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_169] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | |<-Map 37 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_106] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_102] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_164] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_101] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 32 [SIMPLE_EDGE] + | Reduce Output Operator [RS_104] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_100] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_99] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_98] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | |<-Union 31 [SIMPLE_EDGE] + | |<-Reducer 30 [CONTAINS] + | | Reduce Output Operator [RS_98] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ | | Group By Operator [GBY_97] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Group By Operator [GBY_90] | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE | | |<-Union 29 [SIMPLE_EDGE] - | | |<-Map 34 [CONTAINS] - | | | Reduce Output Operator [RS_96] + | | |<-Map 35 [CONTAINS] + | | | Reduce Output Operator [RS_89] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_95] + | | | Group By Operator [GBY_88] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_91] + | | | Select Operator [SEL_84] | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_156] + | | | Filter Operator [FIL_162] | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_90] + | | | TableScan [TS_83] | | | alias:y | | |<-Reducer 28 [CONTAINS] - | | Reduce Output Operator [RS_96] + | | Reduce Output Operator [RS_89] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_95] + | | Group By Operator [GBY_88] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Group By Operator [GBY_88] + | | Group By Operator [GBY_81] | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | outputColumnNames:["_col0","_col1"] | | |<-Union 27 [SIMPLE_EDGE] - | | |<-Map 33 [CONTAINS] - | | | Reduce Output Operator [RS_87] + | | |<-Map 34 [CONTAINS] + | | | Reduce Output Operator [RS_80] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_86] + | | | Group By Operator [GBY_79] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_82] + | | | Select Operator [SEL_75] | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_155] + | | | Filter Operator [FIL_161] | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_81] + | | | TableScan [TS_74] | | | alias:y - | | |<-Reducer 26 [CONTAINS] - | | Reduce Output Operator [RS_87] + | | |<-Map 26 [CONTAINS] + | | Reduce Output Operator [RS_80] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_86] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] | | Group By Operator [GBY_79] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | |<-Union 25 [SIMPLE_EDGE] - | | |<-Map 24 [CONTAINS] - | | | Reduce Output Operator [RS_78] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_77] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_71] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_153] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_70] - | | | alias:x - | | |<-Map 32 [CONTAINS] - | | Reduce Output Operator [RS_78] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_77] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] | | Select Operator [SEL_73] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_154] + | | Filter Operator [FIL_160] | | predicate:value is not null (type: boolean) | | TableScan [TS_72] - | | alias:y - | |<-Reducer 36 [SIMPLE_EDGE] - | Reduce Output Operator [RS_112] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) - | sort order:+ - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string), _col2 (type: string) - | Merge Join Operator [MERGEJOIN_161] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Map 35 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_104] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_100] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_157] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_99] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Map 37 [SIMPLE_EDGE] - | Reduce Output Operator [RS_106] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_102] + | | alias:x + | |<-Map 36 [CONTAINS] + | Reduce Output Operator [RS_98] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_97] + | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_158] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_101] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 6 [CONTAINS] + | Select Operator [SEL_93] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_163] + | predicate:value is not null (type: boolean) + | TableScan [TS_92] + | alias:y + |<-Reducer 4 [CONTAINS] Reduce Output Operator [RS_119] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1044,8 +1043,8 @@ Group By Operator [GBY_68] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - |<-Union 5 [SIMPLE_EDGE] - |<-Reducer 4 [CONTAINS] + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 14 [CONTAINS] | Reduce Output Operator [RS_67] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1053,224 +1052,222 @@ | Group By Operator [GBY_66] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_26] + | Select Operator [SEL_62] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_162] + | Merge Join Operator [MERGEJOIN_168] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 11 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_24] - | | key expressions:_col3 (type: string) - | | Map-reduce partition columns:_col3 (type: string) - | | sort order:+ - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string) - | | Merge Join Operator [MERGEJOIN_159] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | | outputColumnNames:["_col1","_col2","_col3"] - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 10 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_16] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_12] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_146] - | | | predicate:key is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_11] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 12 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_18] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 13 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_58] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | | value expressions:_col1 (type: string) - | | Select Operator [SEL_14] + | | Select Operator [SEL_28] | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_154] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_27] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 20 [SIMPLE_EDGE] + | Reduce Output Operator [RS_60] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_167] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | |<-Map 23 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_54] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_147] - | | predicate:(key is not null and value is not null) (type: boolean) + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_50] + | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_13] + | | Filter Operator [FIL_158] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_49] | | alias:x | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] + | |<-Reducer 19 [SIMPLE_EDGE] + | Reduce Output Operator [RS_52] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_144] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 9 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_145] - | predicate:value is not null (type: boolean) - | TableScan [TS_2] - | alias:y - |<-Reducer 18 [CONTAINS] - Reduce Output Operator [RS_67] - key expressions:_col0 (type: string), _col1 (type: string) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - sort order:++ - Group By Operator [GBY_66] - keys:_col0 (type: string), _col1 (type: string) - outputColumnNames:["_col0","_col1"] - Select Operator [SEL_62] - outputColumnNames:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_163] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col2","_col3"] - |<-Reducer 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_58] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_46] + | Select Operator [SEL_48] | outputColumnNames:["_col0"] | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_45] + | Group By Operator [GBY_47] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | |<-Union 16 [SIMPLE_EDGE] - | |<-Map 20 [CONTAINS] - | | Reduce Output Operator [RS_44] + | |<-Union 18 [SIMPLE_EDGE] + | |<-Map 22 [CONTAINS] + | | Reduce Output Operator [RS_46] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_43] + | | Group By Operator [GBY_45] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_39] + | | Select Operator [SEL_41] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_150] + | | Filter Operator [FIL_157] | | predicate:value is not null (type: boolean) - | | TableScan [TS_38] + | | TableScan [TS_40] | | alias:y - | |<-Reducer 15 [CONTAINS] - | Reduce Output Operator [RS_44] + | |<-Reducer 17 [CONTAINS] + | Reduce Output Operator [RS_46] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_43] + | Group By Operator [GBY_45] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Group By Operator [GBY_36] + | Group By Operator [GBY_38] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | |<-Union 14 [SIMPLE_EDGE] - | |<-Map 13 [CONTAINS] - | | Reduce Output Operator [RS_35] + | |<-Union 16 [SIMPLE_EDGE] + | |<-Map 21 [CONTAINS] + | | Reduce Output Operator [RS_37] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_34] + | | Group By Operator [GBY_36] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_28] + | | Select Operator [SEL_32] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_148] + | | Filter Operator [FIL_156] | | predicate:value is not null (type: boolean) - | | TableScan [TS_27] - | | alias:x - | |<-Map 19 [CONTAINS] - | Reduce Output Operator [RS_35] + | | TableScan [TS_31] + | | alias:y + | |<-Map 15 [CONTAINS] + | Reduce Output Operator [RS_37] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_34] + | Group By Operator [GBY_36] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_30] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_149] + | Filter Operator [FIL_155] | predicate:value is not null (type: boolean) | TableScan [TS_29] - | alias:y - |<-Reducer 22 [SIMPLE_EDGE] - Reduce Output Operator [RS_60] - key expressions:_col3 (type: string) - Map-reduce partition columns:_col3 (type: string) - sort order:+ - Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string), _col2 (type: string) - Merge Join Operator [MERGEJOIN_160] + | alias:x + |<-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_67] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_66] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_26] + outputColumnNames:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_166] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3"] - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Map 21 [SIMPLE_EDGE] - | Reduce Output Operator [RS_52] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3"] + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_22] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | value expressions:_col1 (type: string) - | Select Operator [SEL_48] + | Select Operator [SEL_1] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_151] + | Filter Operator [FIL_150] | predicate:key is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_47] + | TableScan [TS_0] | alias:y | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map 23 [SIMPLE_EDGE] - Reduce Output Operator [RS_54] + |<-Reducer 10 [SIMPLE_EDGE] + Reduce Output Operator [RS_24] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_165] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + |<-Map 12 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_14] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_153] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_13] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_16] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string) - Select Operator [SEL_50] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_12] + outputColumnNames:["_col0"] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_11] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + |<-Union 8 [SIMPLE_EDGE] + |<-Map 11 [CONTAINS] + | Reduce Output Operator [RS_10] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_9] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_152] + | predicate:value is not null (type: boolean) + | TableScan [TS_4] + | alias:y + |<-Map 7 [CONTAINS] + Reduce Output Operator [RS_10] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_9] + keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_152] - predicate:(key is not null and value is not null) (type: boolean) - Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_49] + Select Operator [SEL_3] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_151] + predicate:value is not null (type: boolean) + TableScan [TS_2] alias:x - Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -1301,7 +1298,7 @@ Map Join Operator [MAPJOIN_29] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col3 (type: string)"} - | outputColumnNames:["_col1","_col2","_col5"] + | outputColumnNames:["_col0","_col4","_col5"] | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [BROADCAST_EDGE] | Reduce Output Operator [RS_14] @@ -1309,45 +1306,44 @@ | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_1] - | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_25] - | predicate:key is not null (type: boolean) + | predicate:value is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_0] - | alias:y + | alias:z | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map Join Operator [MAPJOIN_28] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col1 (type: string)"} - | outputColumnNames:["_col0","_col3"] + | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE |<-Map 3 [BROADCAST_EDGE] | Reduce Output Operator [RS_10] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string) + | value expressions:_col1 (type: string) | Select Operator [SEL_6] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_27] - | predicate:(value is not null and key is not null) (type: boolean) + | predicate:(key is not null and value is not null) (type: boolean) | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_5] | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_4] - outputColumnNames:["_col0"] + outputColumnNames:["_col0","_col1"] Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator [FIL_26] - predicate:value is not null (type: boolean) + predicate:key is not null (type: boolean) Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_2] - alias:z + TableScan [TS_3] + alias:y Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN select @@ -1408,17 +1404,17 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 2 <- Map 1 (BROADCAST_EDGE) -Map 10 <- Map 9 (BROADCAST_EDGE) -Map 5 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) +Map 4 <- Map 3 (BROADCAST_EDGE) +Map 7 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 + Reducer 9 File Output Operator [FS_71] compressed:false Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE @@ -1428,52 +1424,94 @@ Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_69] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 6 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 8 [SIMPLE_EDGE] Reduce Output Operator [RS_68] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_66] | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Map 5 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] Reduce Output Operator [RS_65] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_64] aggregations:["count(_col3)","count(_col4)","count(_col5)"] keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_62] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_113] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_111] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 5":"_col15 (type: string), _col17 (type: string)"} - | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] - | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + | keys:{"Map 2":"_col8 (type: string), _col10 (type: string)","Map 7":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE |<-Map 2 [BROADCAST_EDGE] | Reduce Output Operator [RS_58] - | key expressions:_col1 (type: string), _col3 (type: string) - | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | key expressions:_col8 (type: string), _col10 (type: string) + | Map-reduce partition columns:_col8 (type: string), _col10 (type: string) | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col2 (type: string) - | Map Join Operator [MAPJOIN_107] + | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string) + | Map Join Operator [MAPJOIN_110] | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 4":"_col3 (type: string), _col5 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"] + | | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + | |<-Map 4 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_55] + | | key expressions:_col3 (type: string), _col5 (type: string) + | | Map-reduce partition columns:_col3 (type: string), _col5 (type: string) + | | sort order:++ + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col2 (type: string), _col4 (type: string) + | | Map Join Operator [MAPJOIN_106] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 3":"_col0 (type: string)","Map 4":"_col0 (type: string)"} + | | | outputColumnNames:["_col2","_col3","_col4","_col5"] + | | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 3 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_12] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | | Select Operator [SEL_7] + | | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_99] + | | | predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_5] + | | | alias:sr + | | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_10] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_100] + | | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_8] + | | alias:d1 + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map Join Operator [MAPJOIN_105] + | | condition map:[{"":"Inner Join 0 to 1"}] | | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col2","_col3"] | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE | |<-Map 1 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_53] + | | Reduce Output Operator [RS_48] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ @@ -1482,7 +1520,7 @@ | | Select Operator [SEL_1] | | outputColumnNames:["_col0","_col1","_col2","_col3"] | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_99] + | | Filter Operator [FIL_97] | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_0] @@ -1491,125 +1529,83 @@ | |<-Select Operator [SEL_4] | outputColumnNames:["_col0"] | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] + | Filter Operator [FIL_98] | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_2] | alias:d1 | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_51] - outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_112] + |<-Select Operator [SEL_46] + outputColumnNames:["_col10","_col6","_col7","_col8"] + Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_109] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 10":"_col2 (type: string), _col4 (type: string)","Map 5":"_col8 (type: string), _col10 (type: string)"} - | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [BROADCAST_EDGE] - | Reduce Output Operator [RS_49] - | key expressions:_col2 (type: string), _col4 (type: string) - | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) - | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col3 (type: string), _col5 (type: string) - | Map Join Operator [MAPJOIN_111] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 10":"_col0 (type: string)","Map 9":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 9 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_36] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - | | Select Operator [SEL_31] - | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_105] - | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_29] - | | alias:sr - | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_34] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_106] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_32] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_110] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 3":"_col1 (type: string)","Map 5":"_col5 (type: string)"} + | keys:{"Map 5":"_col1 (type: string)","Map 7":"_col5 (type: string)"} | outputColumnNames:["_col6","_col7","_col8","_col10"] | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - |<-Map 3 [BROADCAST_EDGE] + |<-Map 5 [BROADCAST_EDGE] | Reduce Output Operator [RS_42] | key expressions:_col1 (type: string) | Map-reduce partition columns:_col1 (type: string) | sort order:+ | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_7] + | Select Operator [SEL_19] | outputColumnNames:["_col1"] | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_101] | predicate:((key = 'src1key') and value is not null) (type: boolean) | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_5] + | TableScan [TS_17] | alias:src1 | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_109] + |<-Map Join Operator [MAPJOIN_108] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 5":"_col2 (type: string)","Map 4":"_col0 (type: string)"} + | keys:{"Map 7":"_col2 (type: string)","Map 6":"_col0 (type: string)"} | outputColumnNames:["_col4","_col5","_col6","_col8"] | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - |<-Map 4 [BROADCAST_EDGE] - | Reduce Output Operator [RS_24] + |<-Map 6 [BROADCAST_EDGE] + | Reduce Output Operator [RS_36] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] + | Select Operator [SEL_22] | outputColumnNames:["_col0"] | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_102] | predicate:((value = 'd1value') and key is not null) (type: boolean) | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_8] + | TableScan [TS_20] | alias:d1 | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_108] + |<-Map Join Operator [MAPJOIN_107] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 5":"_col1 (type: string)","Map 8":"_col3 (type: string)"} + | keys:{"Map 10":"_col3 (type: string)","Map 7":"_col1 (type: string)"} | outputColumnNames:["_col2","_col3","_col4","_col6"] | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - |<-Map 8 [BROADCAST_EDGE] - | Reduce Output Operator [RS_20] + |<-Map 10 [BROADCAST_EDGE] + | Reduce Output Operator [RS_32] | key expressions:_col3 (type: string) | Map-reduce partition columns:_col3 (type: string) | sort order:+ | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - | Select Operator [SEL_16] + | Select Operator [SEL_28] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_104] | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_14] + | TableScan [TS_26] | alias:ss | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_13] + |<-Select Operator [SEL_25] outputColumnNames:["_col1"] Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator [FIL_103] predicate:((key = 'srcpartkey') and value is not null) (type: boolean) Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_11] + TableScan [TS_23] alias:srcpart Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain @@ -1633,31 +1629,31 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 10 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 11 <- Union 10 (SIMPLE_EDGE) -Map 13 <- Map 14 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 1 <- Union 2 (CONTAINS) -Map 5 <- Map 8 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 4 <- Union 2 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) -Map 9 <- Union 10 (CONTAINS) +Reducer 12 <- Map 14 (BROADCAST_EDGE), Union 11 (SIMPLE_EDGE) +Map 13 <- Union 11 (CONTAINS) +Map 1 <- Reducer 6 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 10 <- Union 11 (CONTAINS) +Map 4 <- Union 5 (CONTAINS) +Map 7 <- Union 5 (CONTAINS) +Reducer 6 <- Map 8 (BROADCAST_EDGE), Union 5 (SIMPLE_EDGE) +Map 9 <- Reducer 12 (BROADCAST_EDGE), Union 2 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 + Reducer 3 File Output Operator [FS_61] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_59] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 6 [SIMPLE_EDGE] - |<-Map 13 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] | Reduce Output Operator [RS_58] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1665,161 +1661,165 @@ | Group By Operator [GBY_57] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_53] + | Select Operator [SEL_26] | outputColumnNames:["_col0","_col1"] | Map Join Operator [MAPJOIN_85] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Reducer 11":"_col0 (type: string)","Map 13":"_col2 (type: string)"} - | | outputColumnNames:["_col0","_col2"] - | |<-Reducer 11 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_49] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"Map 1":"_col0 (type: string)","Reducer 6":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col2"] + | |<-Reducer 6 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_24] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_37] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Map Join Operator [MAPJOIN_84] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Reducer 6":"_col0 (type: string)","Map 8":"_col1 (type: string)"} + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 8 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_18] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_14] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_79] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_13] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_12] | | outputColumnNames:["_col0"] | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_36] + | | Group By Operator [GBY_11] | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | outputColumnNames:["_col0","_col1"] | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 10 [SIMPLE_EDGE] - | | |<-Map 12 [CONTAINS] - | | | Reduce Output Operator [RS_35] + | | |<-Union 5 [SIMPLE_EDGE] + | | |<-Map 4 [CONTAINS] + | | | Reduce Output Operator [RS_10] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_34] + | | | Group By Operator [GBY_9] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_30] + | | | Select Operator [SEL_3] | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_79] + | | | Filter Operator [FIL_77] | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_29] - | | | alias:y - | | |<-Map 9 [CONTAINS] - | | Reduce Output Operator [RS_35] + | | | TableScan [TS_2] + | | | alias:x + | | |<-Map 7 [CONTAINS] + | | Reduce Output Operator [RS_10] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_34] + | | Group By Operator [GBY_9] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_28] + | | Select Operator [SEL_5] | | outputColumnNames:["_col0","_col1"] | | Filter Operator [FIL_78] | | predicate:value is not null (type: boolean) - | | TableScan [TS_27] - | | alias:x - | |<-Map Join Operator [MAPJOIN_83] + | | TableScan [TS_4] + | | alias:y + | |<-Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_76] + | predicate:key is not null (type: boolean) + | TableScan [TS_0] + | alias:y + |<-Map 9 [CONTAINS] + Reduce Output Operator [RS_58] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_57] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_53] + outputColumnNames:["_col0","_col1"] + Map Join Operator [MAPJOIN_87] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 12":"_col1 (type: string)","Map 9":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2"] + |<-Reducer 12 [BROADCAST_EDGE] + | Reduce Output Operator [RS_51] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Map Join Operator [MAPJOIN_86] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 14":"_col0 (type: string)","Map 13":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2"] + | | keys:{"Map 14":"_col1 (type: string)","Reducer 12":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE | |<-Map 14 [BROADCAST_EDGE] | | Reduce Output Operator [RS_45] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) + | | value expressions:_col0 (type: string) | | Select Operator [SEL_41] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_81] - | | predicate:(key is not null and value is not null) (type: boolean) + | | Filter Operator [FIL_83] + | | predicate:(value is not null and key is not null) (type: boolean) | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_40] | | alias:x | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE | |<-Select Operator [SEL_39] | outputColumnNames:["_col0"] - | Filter Operator [FIL_80] - | predicate:key is not null (type: boolean) - | TableScan [TS_38] - | alias:y - |<-Map 5 [CONTAINS] - Reduce Output Operator [RS_58] - key expressions:_col0 (type: string), _col1 (type: string) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - sort order:++ - Group By Operator [GBY_57] - keys:_col0 (type: string), _col1 (type: string) - outputColumnNames:["_col0","_col1"] - Select Operator [SEL_26] - outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_84] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col2 (type: string)"} - | outputColumnNames:["_col0","_col2"] - |<-Reducer 3 [BROADCAST_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] + | Group By Operator [GBY_38] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] + | |<-Union 11 [SIMPLE_EDGE] + | |<-Map 13 [CONTAINS] + | | Reduce Output Operator [RS_37] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_7] + | | Group By Operator [GBY_36] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] + | | Select Operator [SEL_32] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_74] + | | Filter Operator [FIL_82] | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 4 [CONTAINS] - | Reduce Output Operator [RS_8] + | | TableScan [TS_31] + | | alias:y + | |<-Map 10 [CONTAINS] + | Reduce Output Operator [RS_37] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_7] + | Group By Operator [GBY_36] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] + | Select Operator [SEL_30] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_75] + | Filter Operator [FIL_81] | predicate:value is not null (type: boolean) - | TableScan [TS_2] - | alias:y - |<-Map Join Operator [MAPJOIN_82] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 5":"_col0 (type: string)","Map 8":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2"] - |<-Map 8 [BROADCAST_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_14] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_77] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_13] + | TableScan [TS_29] | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_12] + |<-Select Operator [SEL_28] outputColumnNames:["_col0"] - Filter Operator [FIL_76] + Filter Operator [FIL_80] predicate:key is not null (type: boolean) - TableScan [TS_11] + TableScan [TS_27] alias:y PREHOOK: query: explain SELECT x.key, y.value @@ -1850,42 +1850,42 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) -Reducer 13 <- Union 12 (SIMPLE_EDGE), Union 14 (CONTAINS) -Map 30 <- Map 31 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 8 (CONTAINS) -Map 11 <- Union 12 (CONTAINS) -Reducer 24 <- Union 23 (SIMPLE_EDGE), Union 25 (CONTAINS) -Map 1 <- Union 2 (CONTAINS) -Map 20 <- Union 21 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 9 <- Union 8 (SIMPLE_EDGE) -Reducer 26 <- Union 25 (SIMPLE_EDGE) -Map 16 <- Union 12 (CONTAINS) -Map 29 <- Union 25 (CONTAINS) -Map 28 <- Union 23 (CONTAINS) -Reducer 15 <- Union 14 (SIMPLE_EDGE) -Map 18 <- Map 19 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 27 <- Union 21 (CONTAINS) -Map 17 <- Union 14 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Map 5 <- Map 10 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 4 <- Union 2 (CONTAINS) +Map 12 <- Union 13 (CONTAINS) +Map 30 <- Union 26 (CONTAINS) +Reducer 23 <- Union 22 (SIMPLE_EDGE), Union 24 (CONTAINS) +Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) +Map 11 <- Reducer 16 (BROADCAST_EDGE), Union 2 (CONTAINS) +Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) +Map 21 <- Union 22 (CONTAINS) +Map 1 <- Reducer 8 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 20 <- Reducer 27 (BROADCAST_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Map 29 <- Union 24 (CONTAINS) +Reducer 8 <- Map 10 (BROADCAST_EDGE), Union 7 (SIMPLE_EDGE) +Reducer 27 <- Map 31 (BROADCAST_EDGE), Union 26 (SIMPLE_EDGE) +Map 28 <- Union 22 (CONTAINS) +Map 18 <- Union 15 (CONTAINS) +Reducer 16 <- Map 19 (BROADCAST_EDGE), Union 15 (SIMPLE_EDGE) +Map 17 <- Union 13 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 6 <- Union 7 (CONTAINS) +Map 9 <- Union 7 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 + Reducer 5 File Output Operator [FS_122] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_120] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 8 [SIMPLE_EDGE] - |<-Map 30 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 4 [SIMPLE_EDGE] + |<-Map 20 [CONTAINS] | Reduce Output Operator [RS_119] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1895,131 +1895,132 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_114] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_164] + | Map Join Operator [MAPJOIN_170] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 30":"_col3 (type: string)","Reducer 26":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 26 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_110] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"Map 20":"_col0 (type: string)","Reducer 27":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Reducer 27 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_112] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_98] + | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | | Map Join Operator [MAPJOIN_169] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 31":"_col1 (type: string)","Reducer 27":"_col0 (type: string)"} + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 31 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_106] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_102] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_164] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_101] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_100] | | outputColumnNames:["_col0"] | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_97] + | | Group By Operator [GBY_99] | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | outputColumnNames:["_col0","_col1"] | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 25 [SIMPLE_EDGE] - | | |<-Reducer 24 [CONTAINS] - | | | Reduce Output Operator [RS_96] + | | |<-Union 26 [SIMPLE_EDGE] + | | |<-Map 30 [CONTAINS] + | | | Reduce Output Operator [RS_98] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_95] + | | | Group By Operator [GBY_97] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_93] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_163] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_92] + | | | alias:y + | | |<-Reducer 25 [CONTAINS] + | | Reduce Output Operator [RS_98] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_97] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Group By Operator [GBY_90] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | |<-Union 24 [SIMPLE_EDGE] + | | |<-Reducer 23 [CONTAINS] + | | | Reduce Output Operator [RS_89] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ | | | Group By Operator [GBY_88] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Group By Operator [GBY_81] | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | | outputColumnNames:["_col0","_col1"] - | | | |<-Union 23 [SIMPLE_EDGE] - | | | |<-Reducer 22 [CONTAINS] - | | | | Reduce Output Operator [RS_87] + | | | |<-Union 22 [SIMPLE_EDGE] + | | | |<-Map 21 [CONTAINS] + | | | | Reduce Output Operator [RS_80] | | | | key expressions:_col0 (type: string), _col1 (type: string) | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | | sort order:++ - | | | | Group By Operator [GBY_86] - | | | | keys:_col0 (type: string), _col1 (type: string) - | | | | outputColumnNames:["_col0","_col1"] | | | | Group By Operator [GBY_79] - | | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | | | outputColumnNames:["_col0","_col1"] - | | | | |<-Union 21 [SIMPLE_EDGE] - | | | | |<-Map 20 [CONTAINS] - | | | | | Reduce Output Operator [RS_78] - | | | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | | | sort order:++ - | | | | | Group By Operator [GBY_77] - | | | | | keys:_col0 (type: string), _col1 (type: string) - | | | | | outputColumnNames:["_col0","_col1"] - | | | | | Select Operator [SEL_71] - | | | | | outputColumnNames:["_col0","_col1"] - | | | | | Filter Operator [FIL_153] - | | | | | predicate:value is not null (type: boolean) - | | | | | TableScan [TS_70] - | | | | | alias:x - | | | | |<-Map 27 [CONTAINS] - | | | | Reduce Output Operator [RS_78] - | | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | | sort order:++ - | | | | Group By Operator [GBY_77] | | | | keys:_col0 (type: string), _col1 (type: string) | | | | outputColumnNames:["_col0","_col1"] | | | | Select Operator [SEL_73] | | | | outputColumnNames:["_col0","_col1"] - | | | | Filter Operator [FIL_154] + | | | | Filter Operator [FIL_160] | | | | predicate:value is not null (type: boolean) | | | | TableScan [TS_72] - | | | | alias:y + | | | | alias:x | | | |<-Map 28 [CONTAINS] - | | | Reduce Output Operator [RS_87] + | | | Reduce Output Operator [RS_80] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_86] + | | | Group By Operator [GBY_79] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_82] + | | | Select Operator [SEL_75] | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_155] + | | | Filter Operator [FIL_161] | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_81] + | | | TableScan [TS_74] | | | alias:y | | |<-Map 29 [CONTAINS] - | | Reduce Output Operator [RS_96] + | | Reduce Output Operator [RS_89] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_95] + | | Group By Operator [GBY_88] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_91] + | | Select Operator [SEL_84] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_156] + | | Filter Operator [FIL_162] | | predicate:value is not null (type: boolean) - | | TableScan [TS_90] + | | TableScan [TS_83] | | alias:y - | |<-Map Join Operator [MAPJOIN_161] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 30":"_col0 (type: string)","Map 31":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | |<-Map 31 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_106] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_102] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_158] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_101] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_100] + | |<-Select Operator [SEL_71] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_157] + | Filter Operator [FIL_159] | predicate:key is not null (type: boolean) - | TableScan [TS_99] + | TableScan [TS_70] | alias:y - |<-Reducer 7 [CONTAINS] + |<-Reducer 3 [CONTAINS] Reduce Output Operator [RS_119] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2030,8 +2031,8 @@ Group By Operator [GBY_68] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - |<-Union 6 [SIMPLE_EDGE] - |<-Map 18 [CONTAINS] + |<-Union 2 [SIMPLE_EDGE] + |<-Map 11 [CONTAINS] | Reduce Output Operator [RS_67] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2041,105 +2042,106 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_62] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_163] + | Map Join Operator [MAPJOIN_168] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Reducer 15":"_col0 (type: string)","Map 18":"_col3 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 15 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_58] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"Map 11":"_col0 (type: string)","Reducer 16":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Reducer 16 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_60] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_46] + | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | Map Join Operator [MAPJOIN_167] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 19":"_col1 (type: string)","Reducer 16":"_col0 (type: string)"} + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 19 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_54] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_50] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_158] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_49] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_48] | | outputColumnNames:["_col0"] | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_45] + | | Group By Operator [GBY_47] | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | outputColumnNames:["_col0","_col1"] | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 14 [SIMPLE_EDGE] - | | |<-Reducer 13 [CONTAINS] - | | | Reduce Output Operator [RS_44] + | | |<-Union 15 [SIMPLE_EDGE] + | | |<-Reducer 14 [CONTAINS] + | | | Reduce Output Operator [RS_46] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_43] + | | | Group By Operator [GBY_45] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Group By Operator [GBY_36] + | | | Group By Operator [GBY_38] | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | | outputColumnNames:["_col0","_col1"] - | | | |<-Union 12 [SIMPLE_EDGE] - | | | |<-Map 11 [CONTAINS] - | | | | Reduce Output Operator [RS_35] + | | | |<-Union 13 [SIMPLE_EDGE] + | | | |<-Map 12 [CONTAINS] + | | | | Reduce Output Operator [RS_37] | | | | key expressions:_col0 (type: string), _col1 (type: string) | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | | sort order:++ - | | | | Group By Operator [GBY_34] + | | | | Group By Operator [GBY_36] | | | | keys:_col0 (type: string), _col1 (type: string) | | | | outputColumnNames:["_col0","_col1"] - | | | | Select Operator [SEL_28] + | | | | Select Operator [SEL_30] | | | | outputColumnNames:["_col0","_col1"] - | | | | Filter Operator [FIL_148] + | | | | Filter Operator [FIL_155] | | | | predicate:value is not null (type: boolean) - | | | | TableScan [TS_27] + | | | | TableScan [TS_29] | | | | alias:x - | | | |<-Map 16 [CONTAINS] - | | | Reduce Output Operator [RS_35] + | | | |<-Map 17 [CONTAINS] + | | | Reduce Output Operator [RS_37] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_34] + | | | Group By Operator [GBY_36] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_30] + | | | Select Operator [SEL_32] | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_149] + | | | Filter Operator [FIL_156] | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_29] + | | | TableScan [TS_31] | | | alias:y - | | |<-Map 17 [CONTAINS] - | | Reduce Output Operator [RS_44] + | | |<-Map 18 [CONTAINS] + | | Reduce Output Operator [RS_46] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_43] + | | Group By Operator [GBY_45] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_39] + | | Select Operator [SEL_41] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_150] + | | Filter Operator [FIL_157] | | predicate:value is not null (type: boolean) - | | TableScan [TS_38] + | | TableScan [TS_40] | | alias:y - | |<-Map Join Operator [MAPJOIN_160] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 19":"_col0 (type: string)","Map 18":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | |<-Map 19 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_54] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_50] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_152] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_49] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_48] + | |<-Select Operator [SEL_28] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_151] + | Filter Operator [FIL_154] | predicate:key is not null (type: boolean) - | TableScan [TS_47] + | TableScan [TS_27] | alias:y - |<-Map 5 [CONTAINS] + |<-Map 1 [CONTAINS] Reduce Output Operator [RS_67] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2149,77 +2151,78 @@ outputColumnNames:["_col0","_col1"] Select Operator [SEL_26] outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_162] + Map Join Operator [MAPJOIN_166] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col3 (type: string)"} - | outputColumnNames:["_col2","_col3"] - |<-Reducer 3 [BROADCAST_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | keys:{"Map 1":"_col0 (type: string)","Reducer 8":"_col1 (type: string)"} + | outputColumnNames:["_col1","_col3"] + |<-Reducer 8 [BROADCAST_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | Map Join Operator [MAPJOIN_165] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 10":"_col1 (type: string)","Reducer 8":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 10 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_14] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_153] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_13] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_12] | outputColumnNames:["_col0"] | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] + | Group By Operator [GBY_11] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] + | |<-Union 7 [SIMPLE_EDGE] + | |<-Map 6 [CONTAINS] + | | Reduce Output Operator [RS_10] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_7] + | | Group By Operator [GBY_9] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] + | | Select Operator [SEL_3] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_144] + | | Filter Operator [FIL_151] | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] + | | TableScan [TS_2] | | alias:x - | |<-Map 4 [CONTAINS] - | Reduce Output Operator [RS_8] + | |<-Map 9 [CONTAINS] + | Reduce Output Operator [RS_10] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_7] + | Group By Operator [GBY_9] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] + | Select Operator [SEL_5] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_145] + | Filter Operator [FIL_152] | predicate:value is not null (type: boolean) - | TableScan [TS_2] + | TableScan [TS_4] | alias:y - |<-Map Join Operator [MAPJOIN_159] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 10":"_col0 (type: string)","Map 5":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3"] - |<-Map 10 [BROADCAST_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_14] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_147] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_13] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_12] + |<-Select Operator [SEL_1] outputColumnNames:["_col0","_col1"] - Filter Operator [FIL_146] + Filter Operator [FIL_150] predicate:key is not null (type: boolean) - TableScan [TS_11] + TableScan [TS_0] alias:y PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -2782,308 +2785,351 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 13 (CONTAINS) -Map 14 <- Union 13 (CONTAINS) -Map 21 <- Map 20 (BROADCAST_EDGE) -Map 1 <- Union 2 (CONTAINS) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 13 (SIMPLE_EDGE), Union 4 (CONTAINS) -Map 19 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Map 16 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 15 <- Union 13 (CONTAINS) -Map 18 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 17 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Map 5 <- Union 2 (CONTAINS) -Map 6 <- Map 7 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 5 (SIMPLE_EDGE) +Map 12 <- Union 9 (CONTAINS) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 13 <- Union 9 (CONTAINS) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) +Map 4 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS) +Map 19 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 6 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS) +Map 16 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 8 <- Union 9 (CONTAINS) +Map 18 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 17 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Union 4 - |<-Reducer 10 [CONTAINS] - | File Output Operator [FS_77] + Union 3 + |<-Reducer 2 [CONTAINS] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_45] + | Select Operator [SEL_21] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_118] + | Merge Join Operator [MERGEJOIN_120] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} - | | outputColumnNames:["_col0","_col3"] - | |<-Reducer 9 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_41] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_17] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_104] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Union 5 [SIMPLE_EDGE] + | |<-Map 4 [CONTAINS] + | | Reduce Output Operator [RS_19] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Merge Join Operator [MERGEJOIN_115] + | | Map Join Operator [MAPJOIN_119] | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | | outputColumnNames:["_col0","_col1","_col3"] - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 11 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_38] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) + | | | keys:{"Map 4":"_col0 (type: string)","Map 7":"_col1 (type: string)"} + | | | outputColumnNames:["_col1"] + | | |<-Map 7 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_13] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) | | | sort order:+ - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_25] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_9] | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_104] - | | | predicate:key is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_24] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 8 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_36] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_107] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_8] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | | Reduce Output Operator [RS_125] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Please refer to the previous Select Operator [SEL_9] + | | |<-Select Operator [SEL_3] + | | outputColumnNames:["_col0"] + | | Filter Operator [FIL_105] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_2] + | | alias:x + | |<-Map 6 [CONTAINS] + | Reduce Output Operator [RS_19] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Map Join Operator [MAPJOIN_119] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 7":"_col1 (type: string)","Map 6":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<- Please refer to the previous Map 7 [BROADCAST_EDGE] + | |<-Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_106] + | predicate:value is not null (type: boolean) + | TableScan [TS_4] + | alias:y + |<-Reducer 11 [CONTAINS] + | File Output Operator [FS_76] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_45] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_122] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col4"] + | |<-Map 15 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_43] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | | value expressions:_col1 (type: string) - | | Select Operator [SEL_23] + | | Select Operator [SEL_34] | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_112] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_33] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_41] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_121] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE + | |<-Map 14 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_38] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_103] - | | predicate:(key is not null and value is not null) (type: boolean) + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_32] + | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_22] + | | Filter Operator [FIL_111] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_31] | | alias:y | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Union 13 [SIMPLE_EDGE] + | |<-Union 9 [SIMPLE_EDGE] | |<-Map 12 [CONTAINS] - | | Reduce Output Operator [RS_43] + | | Reduce Output Operator [RS_36] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Select Operator [SEL_27] + | | Select Operator [SEL_25] | | outputColumnNames:["_col0"] - | | Filter Operator [FIL_105] + | | Filter Operator [FIL_109] | | predicate:value is not null (type: boolean) - | | TableScan [TS_26] - | | alias:x - | |<-Map 14 [CONTAINS] - | | Reduce Output Operator [RS_43] + | | TableScan [TS_24] + | | alias:y + | |<-Map 13 [CONTAINS] + | | Reduce Output Operator [RS_36] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ | | Select Operator [SEL_29] | | outputColumnNames:["_col0"] - | | Filter Operator [FIL_106] + | | Filter Operator [FIL_110] | | predicate:value is not null (type: boolean) | | TableScan [TS_28] | | alias:y - | |<-Map 15 [CONTAINS] - | Reduce Output Operator [RS_43] + | |<-Map 8 [CONTAINS] + | Reduce Output Operator [RS_36] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Select Operator [SEL_33] + | Select Operator [SEL_23] | outputColumnNames:["_col0"] - | Filter Operator [FIL_107] + | Filter Operator [FIL_108] | predicate:value is not null (type: boolean) - | TableScan [TS_32] - | alias:y + | TableScan [TS_22] + | alias:x |<-Map 19 [CONTAINS] - | File Output Operator [FS_77] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] + | Select Operator [SEL_74] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] + | Map Join Operator [MAPJOIN_124] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 19":"_col0 (type: string)"} + | | keys:{"Map 21":"_col0 (type: string)","Map 19":"_col1 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<-Map 21 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_73] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) + | | Reduce Output Operator [RS_72] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Map Join Operator [MAPJOIN_116] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 21":"_col0 (type: string)","Map 20":"_col0 (type: string)"} - | | | outputColumnNames:["_col0","_col1","_col3"] - | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 20 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_65] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_61] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_112] - | | | predicate:(key is not null and value is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_60] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_63] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_63] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_113] + | | Filter Operator [FIL_118] | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_62] | | alias:x | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_122] + | | Reduce Output Operator [RS_131] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Please refer to the previous Select Operator [SEL_63] + | | Reduce Output Operator [RS_132] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Please refer to the previous Select Operator [SEL_63] + | | Reduce Output Operator [RS_133] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Please refer to the previous Select Operator [SEL_63] + | |<-Map Join Operator [MAPJOIN_123] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 20":"_col1 (type: string)","Map 19":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<-Map 20 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_67] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_116] - | | Reduce Output Operator [RS_123] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_61] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_117] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_60] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_127] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_116] - | | Reduce Output Operator [RS_124] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_61] + | | Reduce Output Operator [RS_128] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_116] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_61] + | | Reduce Output Operator [RS_129] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_61] | |<-Select Operator [SEL_58] | outputColumnNames:["_col0"] - | Filter Operator [FIL_111] + | Filter Operator [FIL_116] | predicate:value is not null (type: boolean) | TableScan [TS_57] | alias:y |<-Map 16 [CONTAINS] - | File Output Operator [FS_77] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] + | Select Operator [SEL_74] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] + | Map Join Operator [MAPJOIN_124] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 16":"_col0 (type: string)"} + | | keys:{"Map 21":"_col0 (type: string)","Map 16":"_col1 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Map Join Operator [MAPJOIN_123] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 20":"_col1 (type: string)","Map 16":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<- Please refer to the previous Map 20 [BROADCAST_EDGE] | |<-Select Operator [SEL_49] | outputColumnNames:["_col0"] - | Filter Operator [FIL_108] + | Filter Operator [FIL_113] | predicate:value is not null (type: boolean) | TableScan [TS_48] | alias:x |<-Map 18 [CONTAINS] - | File Output Operator [FS_77] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] + | Select Operator [SEL_74] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] + | Map Join Operator [MAPJOIN_124] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 18":"_col0 (type: string)"} + | | keys:{"Map 21":"_col0 (type: string)","Map 18":"_col1 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Map Join Operator [MAPJOIN_123] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 20":"_col1 (type: string)","Map 18":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<- Please refer to the previous Map 20 [BROADCAST_EDGE] | |<-Select Operator [SEL_55] | outputColumnNames:["_col0"] - | Filter Operator [FIL_110] + | Filter Operator [FIL_115] | predicate:value is not null (type: boolean) | TableScan [TS_54] | alias:y |<-Map 17 [CONTAINS] - | File Output Operator [FS_77] - | compressed:false - | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] - | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 17":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col4"] - | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Select Operator [SEL_51] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_109] - | predicate:value is not null (type: boolean) - | TableScan [TS_50] - | alias:y - |<-Reducer 3 [CONTAINS] - File Output Operator [FS_77] + File Output Operator [FS_76] compressed:false table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_21] + Select Operator [SEL_74] outputColumnNames:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_117] + Map Join Operator [MAPJOIN_124] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col2","_col3"] - |<-Map 6 [SIMPLE_EDGE] - | Reduce Output Operator [RS_19] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) - | sort order:+ - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string), _col2 (type: string) - | Map Join Operator [MAPJOIN_114] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 7":"_col0 (type: string)","Map 6":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Map 7 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_13] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_9] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_102] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_8] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_7] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_101] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_6] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Union 2 [SIMPLE_EDGE] - |<-Map 1 [CONTAINS] - | Reduce Output Operator [RS_17] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Select Operator [SEL_1] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_99] - | predicate:value is not null (type: boolean) - | TableScan [TS_0] - | alias:x - |<-Map 5 [CONTAINS] - Reduce Output Operator [RS_17] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Select Operator [SEL_3] + | keys:{"Map 21":"_col0 (type: string)","Map 17":"_col1 (type: string)"} + | outputColumnNames:["_col1","_col4"] + |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + |<-Map Join Operator [MAPJOIN_123] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 20":"_col1 (type: string)","Map 17":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + |<- Please refer to the previous Map 20 [BROADCAST_EDGE] + |<-Select Operator [SEL_51] outputColumnNames:["_col0"] - Filter Operator [FIL_100] + Filter Operator [FIL_114] predicate:value is not null (type: boolean) - TableScan [TS_2] + TableScan [TS_50] alias:y PREHOOK: query: explain SELECT x.key, y.value @@ -3114,56 +3160,54 @@ Plan optimized by CBO. Vertex dependency in root stage +Map 12 <- Union 13 (CONTAINS) Map 30 <- Union 24 (CONTAINS) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) Map 23 <- Union 24 (CONTAINS) Map 32 <- Union 28 (CONTAINS) Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) -Map 22 <- Union 19 (CONTAINS) Map 31 <- Union 26 (CONTAINS) -Map 21 <- Union 17 (CONTAINS) -Map 34 <- Map 33 (BROADCAST_EDGE) -Map 1 <- Union 2 (CONTAINS) -Reducer 20 <- Union 19 (SIMPLE_EDGE) -Map 10 <- Map 11 (BROADCAST_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Map 16 <- Union 17 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) +Map 20 <- Union 15 (CONTAINS) +Map 10 <- Union 8 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 19 <- Union 13 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 9 <- Map 11 (BROADCAST_EDGE), Union 8 (SIMPLE_EDGE) +Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS) -Reducer 29 <- Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Map 9 <- Union 2 (CONTAINS) +Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 29 <- Map 33 (BROADCAST_EDGE), Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 16 <- Union 15 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 7 <- Union 8 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 - File Output Operator [FS_121] + Reducer 6 + File Output Operator [FS_120] compressed:false - Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Group By Operator [GBY_119] + Group By Operator [GBY_118] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE - |<-Union 7 [SIMPLE_EDGE] - |<-Reducer 6 [CONTAINS] - | Reduce Output Operator [RS_118] + | Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 4 [CONTAINS] + | Reduce Output Operator [RS_117] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_117] + | Group By Operator [GBY_116] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] | Group By Operator [GBY_67] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | |<-Union 5 [SIMPLE_EDGE] - | |<-Reducer 14 [CONTAINS] + | |<-Union 3 [SIMPLE_EDGE] + | |<-Reducer 18 [CONTAINS] | | Reduce Output Operator [RS_66] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -3173,123 +3217,122 @@ | | outputColumnNames:["_col0","_col1"] | | Select Operator [SEL_61] | | outputColumnNames:["_col0","_col1"] - | | Merge Join Operator [MERGEJOIN_162] + | | Merge Join Operator [MERGEJOIN_166] | | | condition map:[{"":"Inner Join 0 to 1"}] | | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} - | | | outputColumnNames:["_col0","_col3"] - | | |<-Reducer 13 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_57] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string), _col3 (type: string) - | | | Merge Join Operator [MERGEJOIN_159] - | | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | | | outputColumnNames:["_col0","_col1","_col3"] - | | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | | |<-Map 12 [SIMPLE_EDGE] - | | | | Reduce Output Operator [RS_52] - | | | | key expressions:_col0 (type: string) - | | | | Map-reduce partition columns:_col0 (type: string) - | | | | sort order:+ - | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | | value expressions:_col1 (type: string) - | | | | Select Operator [SEL_28] - | | | | outputColumnNames:["_col0","_col1"] - | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | | Filter Operator [FIL_147] - | | | | predicate:(key is not null and value is not null) (type: boolean) - | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | | TableScan [TS_27] - | | | | alias:y - | | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | | |<-Map 15 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_54] + | | | outputColumnNames:["_col1","_col4"] + | | |<-Map 22 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_59] | | | key expressions:_col0 (type: string) | | | Map-reduce partition columns:_col0 (type: string) | | | sort order:+ | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_30] + | | | Select Operator [SEL_50] | | | outputColumnNames:["_col0","_col1"] | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_148] + | | | Filter Operator [FIL_156] | | | predicate:key is not null (type: boolean) | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_29] + | | | TableScan [TS_49] | | | alias:y | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Reducer 20 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_59] + | | |<-Reducer 17 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_57] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | Merge Join Operator [MERGEJOIN_165] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 21 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_54] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_48] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_155] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_47] + | | | alias:y + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 16 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_52] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_50] + | | Select Operator [SEL_46] | | outputColumnNames:["_col0"] | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_49] + | | Group By Operator [GBY_45] | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | | outputColumnNames:["_col0","_col1"] | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 19 [SIMPLE_EDGE] - | | |<-Map 22 [CONTAINS] - | | | Reduce Output Operator [RS_48] + | | |<-Union 15 [SIMPLE_EDGE] + | | |<-Reducer 14 [CONTAINS] + | | | Reduce Output Operator [RS_44] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_47] + | | | Group By Operator [GBY_43] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_43] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_151] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_42] - | | | alias:y - | | |<-Reducer 18 [CONTAINS] - | | Reduce Output Operator [RS_48] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_47] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Group By Operator [GBY_40] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | |<-Union 17 [SIMPLE_EDGE] - | | |<-Map 21 [CONTAINS] - | | | Reduce Output Operator [RS_39] + | | | Group By Operator [GBY_36] + | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | |<-Union 13 [SIMPLE_EDGE] + | | | |<-Map 12 [CONTAINS] + | | | | Reduce Output Operator [RS_35] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_34] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Select Operator [SEL_28] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Filter Operator [FIL_152] + | | | | predicate:value is not null (type: boolean) + | | | | TableScan [TS_27] + | | | | alias:x + | | | |<-Map 19 [CONTAINS] + | | | Reduce Output Operator [RS_35] | | | key expressions:_col0 (type: string), _col1 (type: string) | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | | sort order:++ - | | | Group By Operator [GBY_38] + | | | Group By Operator [GBY_34] | | | keys:_col0 (type: string), _col1 (type: string) | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_34] + | | | Select Operator [SEL_30] | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_150] + | | | Filter Operator [FIL_153] | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_33] + | | | TableScan [TS_29] | | | alias:y - | | |<-Map 16 [CONTAINS] - | | Reduce Output Operator [RS_39] + | | |<-Map 20 [CONTAINS] + | | Reduce Output Operator [RS_44] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_38] + | | Group By Operator [GBY_43] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_32] + | | Select Operator [SEL_39] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_149] + | | Filter Operator [FIL_154] | | predicate:value is not null (type: boolean) - | | TableScan [TS_31] - | | alias:x - | |<-Reducer 4 [CONTAINS] + | | TableScan [TS_38] + | | alias:y + | |<-Reducer 2 [CONTAINS] | Reduce Output Operator [RS_66] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -3299,140 +3342,139 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_26] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_161] + | Merge Join Operator [MERGEJOIN_164] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Map 10 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_24] - | | key expressions:_col3 (type: string) - | | Map-reduce partition columns:_col3 (type: string) + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_22] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string) - | | Map Join Operator [MAPJOIN_158] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 11":"_col0 (type: string)","Map 10":"_col0 (type: string)"} - | | | outputColumnNames:["_col1","_col2","_col3"] - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 11 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_18] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_14] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_146] - | | | predicate:(key is not null and value is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_13] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_12] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_1] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_145] + | | Filter Operator [FIL_148] | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_11] + | | TableScan [TS_0] | | alias:y | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | |<-Reducer 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | Map Join Operator [MAPJOIN_163] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 11":"_col1 (type: string)","Reducer 9":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 11 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_14] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_151] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_13] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_12] | outputColumnNames:["_col0"] | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] + | Group By Operator [GBY_11] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] + | |<-Union 8 [SIMPLE_EDGE] + | |<-Map 10 [CONTAINS] + | | Reduce Output Operator [RS_10] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | | sort order:++ - | | Group By Operator [GBY_7] + | | Group By Operator [GBY_9] | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] + | | Select Operator [SEL_5] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_143] + | | Filter Operator [FIL_150] | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 9 [CONTAINS] - | Reduce Output Operator [RS_8] + | | TableScan [TS_4] + | | alias:y + | |<-Map 7 [CONTAINS] + | Reduce Output Operator [RS_10] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_7] + | Group By Operator [GBY_9] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_3] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_144] + | Filter Operator [FIL_149] | predicate:value is not null (type: boolean) | TableScan [TS_2] - | alias:y + | alias:x |<-Reducer 29 [CONTAINS] - Reduce Output Operator [RS_118] + Reduce Output Operator [RS_117] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) sort order:++ - Group By Operator [GBY_117] + Group By Operator [GBY_116] keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Select Operator [SEL_113] + Select Operator [SEL_112] outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_163] + Map Join Operator [MAPJOIN_168] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 34":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"} + | keys:{"Map 34":"_col0 (type: string)","Reducer 29":"_col1 (type: string)"} | outputColumnNames:["_col1","_col4"] |<-Map 34 [BROADCAST_EDGE] - | Reduce Output Operator [RS_111] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) + | Reduce Output Operator [RS_110] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string), _col3 (type: string) - | Map Join Operator [MAPJOIN_160] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 34":"_col0 (type: string)","Map 33":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col1","_col3"] - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | |<-Map 33 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_103] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_99] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_156] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_98] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_101] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_101] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_157] + | Filter Operator [FIL_162] | predicate:key is not null (type: boolean) | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_100] | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_167] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 33":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + |<-Map 33 [BROADCAST_EDGE] + | Reduce Output Operator [RS_105] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_99] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_161] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_98] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_97] outputColumnNames:["_col0"] Group By Operator [GBY_96] @@ -3449,7 +3491,7 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_90] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_155] + | Filter Operator [FIL_160] | predicate:value is not null (type: boolean) | TableScan [TS_89] | alias:y @@ -3487,7 +3529,7 @@ | | outputColumnNames:["_col0","_col1"] | | Select Operator [SEL_72] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_153] + | | Filter Operator [FIL_158] | | predicate:value is not null (type: boolean) | | TableScan [TS_71] | | alias:y @@ -3501,7 +3543,7 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_70] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_152] + | Filter Operator [FIL_157] | predicate:value is not null (type: boolean) | TableScan [TS_69] | alias:x @@ -3515,7 +3557,7 @@ outputColumnNames:["_col0","_col1"] Select Operator [SEL_81] outputColumnNames:["_col0","_col1"] - Filter Operator [FIL_154] + Filter Operator [FIL_159] predicate:value is not null (type: boolean) TableScan [TS_80] alias:y Index: ql/src/test/results/clientpositive/tez/ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/ptf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/tez/ptf.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -571,28 +571,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -748,21 +748,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -770,7 +770,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -950,21 +950,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -972,7 +972,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1440,7 +1440,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1448,7 +1448,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1613,28 +1613,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1786,28 +1786,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2019,28 +2019,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2202,20 +2202,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2414,33 +2414,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -2448,7 +2448,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2767,14 +2767,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2986,28 +2986,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3038,22 +3038,22 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) Reducer 5 Reduce Operator Tree: Select Operator @@ -3074,35 +3074,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3431,28 +3431,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3697,28 +3697,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3939,28 +3939,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4218,28 +4218,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4479,28 +4479,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4727,28 +4727,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/tez/cbo_join.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/cbo_join.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/tez/cbo_join.q.out (working copy) @@ -1,4 +1,5 @@ -PREHOOK: query: -- 4. Test Select + Join + TS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6,7 +7,8 @@ PREHOOK: Input: default@cbo_t2 PREHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: -- 4. Test Select + Join + TS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 @@ -122,46 +124,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL 1 1 1 @@ -522,6 +484,46 @@ 1 1 1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -632,8 +634,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -730,6 +730,8 @@ 1 1 1 1 1 1 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -744,8 +746,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -847,6 +847,8 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -861,10 +863,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -966,6 +964,10 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL +NULL NULL +NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5334,8 +5336,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -5870,6 +5870,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6430,8 +6432,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -6966,6 +6966,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: -- 5. Test Select + Join + FIL + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0) PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out (working copy) @@ -346,28 +346,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -768,7 +768,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -776,7 +776,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1297,28 +1297,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1630,21 +1630,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1652,7 +1652,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1999,21 +1999,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -2021,7 +2021,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2957,7 +2957,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -2965,7 +2965,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3280,28 +3280,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3604,28 +3604,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3998,28 +3998,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4352,20 +4352,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4866,33 +4866,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -4900,7 +4900,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5454,14 +5454,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5728,7 +5728,7 @@ TOK_DISTRIBUTEBY TOK_TABLE_OR_COL p_mfgr - TOK_SORTBY + TOK_SORTBY TOK_TABSORTCOLNAMEASC TOK_TABLE_OR_COL p_mfgr @@ -5939,28 +5939,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6010,15 +6010,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) @@ -6026,7 +6026,7 @@ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: true Reducer 5 Needs Tagging: false @@ -6049,35 +6049,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6616,28 +6616,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7066,28 +7066,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7477,28 +7477,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7939,28 +7939,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8410,28 +8410,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8840,28 +8840,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/tez/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/subquery_in.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/tez/subquery_in.q.out (working copy) @@ -335,7 +335,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -343,7 +343,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -507,7 +507,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -515,7 +515,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/annotate_stats_union.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_union.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out (working copy) @@ -76,11 +76,7 @@ Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 16 rawDataSize: 1376 @@ -90,45 +86,23 @@ explain select * from (select state from loc_orc union all select state from loc_orc) tmp POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - Union - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -152,11 +126,7 @@ Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 16 rawDataSize: 1592 @@ -166,45 +136,23 @@ explain select * from (select * from loc_orc union all select * from loc_orc) tmp POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - Union - Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -318,32 +266,24 @@ Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -372,31 +312,23 @@ TableScan alias: loc_staging Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out (working copy) @@ -119,17 +119,21 @@ Map Operator Tree: TableScan alias: src - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + key expressions: $f0 (type: string), $f1 (type: string), $f2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: $f0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -227,27 +231,125 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + /srcpart/ds=2008-04-09/hr=11 [src] + /srcpart/ds=2008-04-09/hr=12 [src] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(DISTINCT KEY._col1:2._col0) keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string), $f3 (type: double), $f4 (type: bigint) + outputColumnNames: _o__c0, _o__c1, _o__c2, _o__c3, _o__c4 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string), UDFToInteger(_o__c3) (type: int), UDFToInteger(_o__c4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -304,6 +406,8 @@ PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 POSTHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 @@ -314,11 +418,13 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.null, ] -POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/groupby_sort_test_1.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_test_1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_sort_test_1.q.out (working copy) @@ -59,37 +59,37 @@ Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToInteger($f0) (type: int), UDFToInteger($f1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/alter_char2.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_char2.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/alter_char2.q.out (working copy) @@ -37,7 +37,6 @@ POSTHOOK: Input: default@alter_char2 POSTHOOK: Input: default@alter_char2@hr=1 #### A masked pattern was here #### -val_238 7 PREHOOK: query: alter table alter_char2 change column c1 c1 char(10) PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: default@alter_char2 @@ -56,7 +55,6 @@ POSTHOOK: Input: default@alter_char2 POSTHOOK: Input: default@alter_char2@hr=1 #### A masked pattern was here #### -1 val_238 7 PREHOOK: query: insert overwrite table alter_char2 partition (hr=2) select key from src limit 1 PREHOOK: type: QUERY @@ -72,21 +70,23 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alter_char2 PREHOOK: Input: default@alter_char2@hr=1 +PREHOOK: Input: default@alter_char2@hr=2 #### A masked pattern was here #### POSTHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_char2 POSTHOOK: Input: default@alter_char2@hr=1 +POSTHOOK: Input: default@alter_char2@hr=2 #### A masked pattern was here #### -1 val_238 7 PREHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 2 PREHOOK: type: QUERY PREHOOK: Input: default@alter_char2 +PREHOOK: Input: default@alter_char2@hr=1 PREHOOK: Input: default@alter_char2@hr=2 #### A masked pattern was here #### POSTHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_char2 +POSTHOOK: Input: default@alter_char2@hr=1 POSTHOOK: Input: default@alter_char2@hr=2 #### A masked pattern was here #### -2 238 3 Index: ql/src/test/results/clientpositive/analyze_table_null_partition.q.out =================================================================== --- ql/src/test/results/clientpositive/analyze_table_null_partition.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/analyze_table_null_partition.q.out (working copy) @@ -37,38 +37,14 @@ POSTHOOK: query: FROM test1 INSERT OVERWRITE TABLE test2 PARTITION(age) SELECT test1.name, test1.age POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 -POSTHOOK: Output: default@test2@age=15 -POSTHOOK: Output: default@test2@age=30 -POSTHOOK: Output: default@test2@age=40 -POSTHOOK: Output: default@test2@age=__HIVE_DEFAULT_PARTITION__ -POSTHOOK: Lineage: test2 PARTITION(age=15).name SIMPLE [(test1)test1.FieldSchema(name:name, type:string, comment:null), ] -POSTHOOK: Lineage: test2 PARTITION(age=30).name SIMPLE [(test1)test1.FieldSchema(name:name, type:string, comment:null), ] -POSTHOOK: Lineage: test2 PARTITION(age=40).name SIMPLE [(test1)test1.FieldSchema(name:name, type:string, comment:null), ] -POSTHOOK: Lineage: test2 PARTITION(age=__HIVE_DEFAULT_PARTITION__).name SIMPLE [(test1)test1.FieldSchema(name:name, type:string, comment:null), ] PREHOOK: query: ANALYZE TABLE test2 PARTITION(age) COMPUTE STATISTICS PREHOOK: type: QUERY PREHOOK: Input: default@test2 -PREHOOK: Input: default@test2@age=15 -PREHOOK: Input: default@test2@age=30 -PREHOOK: Input: default@test2@age=40 -PREHOOK: Input: default@test2@age=__HIVE_DEFAULT_PARTITION__ PREHOOK: Output: default@test2 -PREHOOK: Output: default@test2@age=15 -PREHOOK: Output: default@test2@age=30 -PREHOOK: Output: default@test2@age=40 -PREHOOK: Output: default@test2@age=__HIVE_DEFAULT_PARTITION__ POSTHOOK: query: ANALYZE TABLE test2 PARTITION(age) COMPUTE STATISTICS POSTHOOK: type: QUERY POSTHOOK: Input: default@test2 -POSTHOOK: Input: default@test2@age=15 -POSTHOOK: Input: default@test2@age=30 -POSTHOOK: Input: default@test2@age=40 -POSTHOOK: Input: default@test2@age=__HIVE_DEFAULT_PARTITION__ POSTHOOK: Output: default@test2 -POSTHOOK: Output: default@test2@age=15 -POSTHOOK: Output: default@test2@age=30 -POSTHOOK: Output: default@test2@age=40 -POSTHOOK: Output: default@test2@age=__HIVE_DEFAULT_PARTITION__ PREHOOK: query: -- To show stats. It doesn't show due to a bug. DESC EXTENDED test2 PREHOOK: type: DESCTABLE @@ -115,188 +91,15 @@ Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - age 15 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - numFiles 1 - numRows 1 - partition_columns age - partition_columns.types int - rawDataSize 3 - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 4 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - partition_columns age - partition_columns.types int - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test2 - name: default.test2 - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - age 30 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - numFiles 1 - numRows 1 - partition_columns age - partition_columns.types int - rawDataSize 0 - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - partition_columns age - partition_columns.types int - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test2 - name: default.test2 - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - age 40 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - numFiles 1 - numRows 1 - partition_columns age - partition_columns.types int - rawDataSize 4 - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - partition_columns age - partition_columns.types int - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test2 - name: default.test2 - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - age __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - numFiles 1 - numRows 2 - partition_columns age - partition_columns.types int - rawDataSize 4 - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 6 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns name - columns.comments - columns.types string -#### A masked pattern was here #### - name default.test2 - partition_columns age - partition_columns.types int - serialization.ddl struct test2 { string name} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test2 - name: default.test2 Processor Tree: TableScan alias: test2 - Statistics: Num rows: 5 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE GatherStats: false Select Operator expressions: name (type: string), age (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 111 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: name, age + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: DROP TABLE test1 Index: ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out =================================================================== --- ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out (working copy) @@ -32,11 +32,11 @@ Processor Tree: TableScan alias: array_table - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: index (type: int), array[index] (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: index, _o__c1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: select index, `array`[index] from array_table @@ -47,10 +47,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@array_table #### A masked pattern was here #### -1 second -2 third -2 third -0 first PREHOOK: query: create table map_table (data map, key int ) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -85,11 +81,11 @@ Processor Tree: TableScan alias: map_table - Statistics: Num rows: 4 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), data[key] (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 84 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: key, _o__c1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: select key, data[key] from map_table @@ -100,7 +96,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@map_table #### A masked pattern was here #### -2 two -3 three -3 three -1 one Index: ql/src/test/results/clientpositive/groupby_cube1.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_cube1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_cube1.q.out (working copy) @@ -36,32 +36,31 @@ alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), val (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: $f0 (type: string), $f1 (type: string), $f2 (type: bigint) + outputColumnNames: key, val, _o__c2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -85,24 +84,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 11 1 -1 NULL 1 -2 12 1 -2 NULL 1 -3 13 1 -3 NULL 1 -7 17 1 -7 NULL 1 -8 18 1 -8 28 1 -8 NULL 2 -NULL 11 1 -NULL 12 1 -NULL 13 1 -NULL 17 1 -NULL 18 1 -NULL 28 1 -NULL NULL 6 PREHOOK: query: EXPLAIN SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube PREHOOK: type: QUERY @@ -121,19 +102,19 @@ alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), val (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: @@ -141,11 +122,11 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: $f0, $f1, GROUPING__ID, $f2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: $f0 (type: string), $f1 (type: string), GROUPING__ID (type: int), $f2 (type: bigint) + outputColumnNames: key, val, grouping__id, _o__c3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -169,24 +150,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 11 3 1 -1 NULL 1 1 -2 12 3 1 -2 NULL 1 1 -3 13 3 1 -3 NULL 1 1 -7 17 3 1 -7 NULL 1 1 -8 18 3 1 -8 28 3 1 -8 NULL 1 2 -NULL 11 2 1 -NULL 12 2 1 -NULL 13 2 1 -NULL 17 2 1 -NULL 18 2 1 -NULL 28 2 1 -NULL NULL 0 6 PREHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with cube PREHOOK: type: QUERY @@ -204,32 +167,27 @@ TableScan alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), '0' (type: string), _col1 (type: string) + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col2 + outputColumnNames: key, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: key (type: string), $f1 (type: bigint) + outputColumnNames: key, _o__c1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -253,12 +211,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -NULL 6 PREHOOK: query: EXPLAIN SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube PREHOOK: type: QUERY @@ -278,12 +230,12 @@ alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), val (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -320,14 +272,13 @@ Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: $f0 (type: string), $f1 (type: string), $f2 (type: bigint) + outputColumnNames: key, val, _o__c2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -351,24 +302,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 11 1 -1 NULL 1 -2 12 1 -2 NULL 1 -3 13 1 -3 NULL 1 -7 17 1 -7 NULL 1 -8 18 1 -8 28 1 -8 NULL 2 -NULL 11 1 -NULL 12 1 -NULL 13 1 -NULL 17 1 -NULL 18 1 -NULL 28 1 -NULL NULL 6 PREHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with cube PREHOOK: type: QUERY @@ -386,13 +319,9 @@ TableScan alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), '0' (type: string), _col1 (type: string) + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -404,14 +333,13 @@ Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col2 + outputColumnNames: key, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: key (type: string), $f1 (type: bigint) + outputColumnNames: key, _o__c1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -435,12 +363,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -NULL 6 PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -549,9 +471,13 @@ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -617,9 +543,13 @@ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Index: ql/src/test/results/clientpositive/groupby5_map_skew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby5_map_skew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby5_map_skew.q.out (working copy) @@ -24,12 +24,8 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0) + aggregations: sum(key) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -41,15 +37,15 @@ Group By Operator aggregations: sum(VALUE._col0) mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int) + expressions: UDFToInteger($f0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -86,4 +82,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091 +NULL Index: ql/src/test/results/clientpositive/quotedid_basic.q.out =================================================================== --- ql/src/test/results/clientpositive/quotedid_basic.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/quotedid_basic.q.out (working copy) @@ -211,7 +211,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -219,7 +219,7 @@ isPivotResult: true Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -315,7 +315,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -323,7 +323,7 @@ isPivotResult: true Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/vectorized_ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/vectorized_ptf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/vectorized_ptf.q.out (working copy) @@ -387,28 +387,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -844,7 +844,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -852,7 +852,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1408,28 +1408,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1782,21 +1782,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1804,7 +1804,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2238,21 +2238,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -2260,7 +2260,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3303,7 +3303,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -3311,7 +3311,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3667,28 +3667,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4032,28 +4032,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4514,28 +4514,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4909,20 +4909,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5503,33 +5503,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -5537,7 +5537,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6172,14 +6172,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6706,28 +6706,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6841,15 +6841,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6860,7 +6860,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col5,_wcol0 + columns _col1,_col2,_col5,sum_window_0 columns.types string,string,int,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6880,7 +6880,7 @@ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6891,7 +6891,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col5,_wcol0 + columns _col1,_col2,_col5,sum_window_0 columns.types string,string,int,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6900,7 +6900,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col5,_wcol0 + columns _col1,_col2,_col5,sum_window_0 columns.types string,string,int,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6927,35 +6927,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7553,28 +7553,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8138,28 +8138,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8637,28 +8637,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -9234,28 +9234,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -9793,28 +9793,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -10311,28 +10311,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/groupby_ppr.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_ppr.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_ppr.q.out (working copy) @@ -106,17 +106,21 @@ Map Operator Tree: TableScan alias: src - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: $f0 (type: string), $f1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: $f0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -214,27 +218,125 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + /srcpart/ds=2008-04-09/hr=11 [src] + /srcpart/ds=2008-04-09/hr=12 [src] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string) + outputColumnNames: _o__c0, _o__c1, _o__c2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -291,6 +393,8 @@ PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 POSTHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 @@ -301,6 +405,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/groupby3.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby3.q.out (working copy) @@ -47,12 +47,12 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: $f0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator @@ -79,15 +79,15 @@ Group By Operator aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/correlationoptimizer12.q.out =================================================================== --- ql/src/test/results/clientpositive/correlationoptimizer12.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/correlationoptimizer12.q.out (working copy) @@ -52,7 +52,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col1 name: count window function: GenericUDAFCountEvaluator @@ -62,7 +62,7 @@ predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: bigint) + expressions: _col0 (type: string), count_window_0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -137,7 +137,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col1 name: count window function: GenericUDAFCountEvaluator @@ -147,7 +147,7 @@ predicate: _col0 is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: bigint) + expressions: _col0 (type: string), count_window_0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/groupby1_map_skew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_map_skew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby1_map_skew.q.out (working copy) @@ -31,11 +31,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) + aggregations: sum($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -74,10 +74,10 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + expressions: UDFToInteger($f0) (type: int), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -120,312 +120,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 -105 105.0 -11 11.0 -111 111.0 -113 226.0 -114 114.0 -116 116.0 -118 236.0 -119 357.0 -12 24.0 -120 240.0 -125 250.0 -126 126.0 -128 384.0 -129 258.0 -131 131.0 -133 133.0 -134 268.0 -136 136.0 -137 274.0 -138 552.0 -143 143.0 -145 145.0 -146 292.0 -149 298.0 -15 30.0 -150 150.0 -152 304.0 -153 153.0 -155 155.0 -156 156.0 -157 157.0 -158 158.0 -160 160.0 -162 162.0 -163 163.0 -164 328.0 -165 330.0 -166 166.0 -167 501.0 -168 168.0 -169 676.0 -17 17.0 -170 170.0 -172 344.0 -174 348.0 -175 350.0 -176 352.0 -177 177.0 -178 178.0 -179 358.0 -18 36.0 -180 180.0 -181 181.0 -183 183.0 -186 186.0 -187 561.0 -189 189.0 -19 19.0 -190 190.0 -191 382.0 -192 192.0 -193 579.0 -194 194.0 -195 390.0 -196 196.0 -197 394.0 -199 597.0 -2 2.0 -20 20.0 -200 400.0 -201 201.0 -202 202.0 -203 406.0 -205 410.0 -207 414.0 -208 624.0 -209 418.0 -213 426.0 -214 214.0 -216 432.0 -217 434.0 -218 218.0 -219 438.0 -221 442.0 -222 222.0 -223 446.0 -224 448.0 -226 226.0 -228 228.0 -229 458.0 -230 1150.0 -233 466.0 -235 235.0 -237 474.0 -238 476.0 -239 478.0 -24 48.0 -241 241.0 -242 484.0 -244 244.0 -247 247.0 -248 248.0 -249 249.0 -252 252.0 -255 510.0 -256 512.0 -257 257.0 -258 258.0 -26 52.0 -260 260.0 -262 262.0 -263 263.0 -265 530.0 -266 266.0 -27 27.0 -272 544.0 -273 819.0 -274 274.0 -275 275.0 -277 1108.0 -278 556.0 -28 28.0 -280 560.0 -281 562.0 -282 564.0 -283 283.0 -284 284.0 -285 285.0 -286 286.0 -287 287.0 -288 576.0 -289 289.0 -291 291.0 -292 292.0 -296 296.0 -298 894.0 -30 30.0 -302 302.0 -305 305.0 -306 306.0 -307 614.0 -308 308.0 -309 618.0 -310 310.0 -311 933.0 -315 315.0 -316 948.0 -317 634.0 -318 954.0 -321 642.0 -322 644.0 -323 323.0 -325 650.0 -327 981.0 -33 33.0 -331 662.0 -332 332.0 -333 666.0 -335 335.0 -336 336.0 -338 338.0 -339 339.0 -34 34.0 -341 341.0 -342 684.0 -344 688.0 -345 345.0 -348 1740.0 -35 105.0 -351 351.0 -353 706.0 -356 356.0 -360 360.0 -362 362.0 -364 364.0 -365 365.0 -366 366.0 -367 734.0 -368 368.0 -369 1107.0 -37 74.0 -373 373.0 -374 374.0 -375 375.0 -377 377.0 -378 378.0 -379 379.0 -382 764.0 -384 1152.0 -386 386.0 -389 389.0 -392 392.0 -393 393.0 -394 394.0 -395 790.0 -396 1188.0 -397 794.0 -399 798.0 -4 4.0 -400 400.0 -401 2005.0 -402 402.0 -403 1209.0 -404 808.0 -406 1624.0 -407 407.0 -409 1227.0 -41 41.0 -411 411.0 -413 826.0 -414 828.0 -417 1251.0 -418 418.0 -419 419.0 -42 84.0 -421 421.0 -424 848.0 -427 427.0 -429 858.0 -43 43.0 -430 1290.0 -431 1293.0 -432 432.0 -435 435.0 -436 436.0 -437 437.0 -438 1314.0 -439 878.0 -44 44.0 -443 443.0 -444 444.0 -446 446.0 -448 448.0 -449 449.0 -452 452.0 -453 453.0 -454 1362.0 -455 455.0 -457 457.0 -458 916.0 -459 918.0 -460 460.0 -462 924.0 -463 926.0 -466 1398.0 -467 467.0 -468 1872.0 -469 2345.0 -47 47.0 -470 470.0 -472 472.0 -475 475.0 -477 477.0 -478 956.0 -479 479.0 -480 1440.0 -481 481.0 -482 482.0 -483 483.0 -484 484.0 -485 485.0 -487 487.0 -489 1956.0 -490 490.0 -491 491.0 -492 984.0 -493 493.0 -494 494.0 -495 495.0 -496 496.0 -497 497.0 -498 1494.0 -5 15.0 -51 102.0 -53 53.0 -54 54.0 -57 57.0 -58 116.0 -64 64.0 -65 65.0 -66 66.0 -67 134.0 -69 69.0 -70 210.0 -72 144.0 -74 74.0 -76 152.0 -77 77.0 -78 78.0 -8 8.0 -80 80.0 -82 82.0 -83 166.0 -84 168.0 -85 85.0 -86 86.0 -87 87.0 -9 9.0 -90 270.0 -92 92.0 -95 190.0 -96 96.0 -97 194.0 -98 196.0 Index: ql/src/test/results/clientpositive/join_alt_syntax.q.out =================================================================== --- ql/src/test/results/clientpositive/join_alt_syntax.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/join_alt_syntax.q.out (working copy) @@ -359,13 +359,13 @@ and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -379,34 +379,36 @@ outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -418,89 +420,89 @@ Map Reduce Map Operator Tree: TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) TableScan Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -519,54 +521,56 @@ and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -578,89 +582,89 @@ Map Reduce Map Operator Tree: TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) TableScan Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/groupby6_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby6_map.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby6_map.q.out (working copy) @@ -32,10 +32,10 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -48,7 +48,7 @@ Group By Operator keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -91,13 +91,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 Index: ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out (working copy) @@ -89,12 +89,8 @@ TableScan alias: location Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + keys: state (type: string), country (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE @@ -107,7 +103,7 @@ Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: state, country Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -142,27 +138,22 @@ TableScan alias: location Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: state (type: string), country (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: state, country Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true File Output Operator compressed: false Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE @@ -200,29 +191,25 @@ TableScan alias: location Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + keys: state (type: string), country (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: state, country + Statistics: Num rows: 5 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -255,29 +242,25 @@ TableScan alias: location Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: state (type: string), votes (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) + keys: state (type: string), votes (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 840 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 840 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: state, votes + Statistics: Num rows: 5 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -308,30 +291,25 @@ TableScan alias: location Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: state (type: string), country (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 8256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 10120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 32 Data size: 8256 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 40 Data size: 10120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true + outputColumnNames: state, country + Statistics: Num rows: 40 Data size: 6720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 6720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -362,29 +340,25 @@ TableScan alias: location Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + keys: state (type: string), country (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: state, country + Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -415,30 +389,25 @@ TableScan alias: location Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + keys: state (type: string), country (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 20240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 80 Data size: 20240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true + outputColumnNames: state, country + Statistics: Num rows: 80 Data size: 13440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 13440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/bucket1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/bucket1.q.out (working copy) @@ -48,16 +48,12 @@ alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Map-reduce partition columns: UDFToInteger(key) (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: string), _col1 (type: string) + value expressions: key (type: string), value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -111,9 +107,13 @@ Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 Index: ql/src/test/results/clientpositive/fileformat_mix.q.out =================================================================== --- ql/src/test/results/clientpositive/fileformat_mix.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/fileformat_mix.q.out (working copy) @@ -53,7 +53,7 @@ POSTHOOK: Input: default@fileformat_mix_test@ds=1 POSTHOOK: Input: default@fileformat_mix_test@ds=2 #### A masked pattern was here #### -500 +0 PREHOOK: query: select src from fileformat_mix_test PREHOOK: type: QUERY PREHOOK: Input: default@fileformat_mix_test @@ -66,503 +66,3 @@ POSTHOOK: Input: default@fileformat_mix_test@ds=1 POSTHOOK: Input: default@fileformat_mix_test@ds=2 #### A masked pattern was here #### -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -265 -193 -401 -150 -273 -224 -369 -66 -128 -213 -146 -406 -429 -374 -152 -469 -145 -495 -37 -327 -281 -277 -209 -15 -82 -403 -166 -417 -430 -252 -292 -219 -287 -153 -193 -338 -446 -459 -394 -237 -482 -174 -413 -494 -207 -199 -466 -208 -174 -399 -396 -247 -417 -489 -162 -377 -397 -309 -365 -266 -439 -342 -367 -325 -167 -195 -475 -17 -113 -155 -203 -339 -0 -455 -128 -311 -316 -57 -302 -205 -149 -438 -345 -129 -170 -20 -489 -157 -378 -221 -92 -111 -47 -72 -4 -280 -35 -427 -277 -208 -356 -399 -169 -382 -498 -125 -386 -437 -469 -192 -286 -187 -176 -54 -459 -51 -138 -103 -239 -213 -216 -430 -278 -176 -289 -221 -65 -318 -332 -311 -275 -137 -241 -83 -333 -180 -284 -12 -230 -181 -67 -260 -404 -384 -489 -353 -373 -272 -138 -217 -84 -348 -466 -58 -8 -411 -230 -208 -348 -24 -463 -431 -179 -172 -42 -129 -158 -119 -496 -0 -322 -197 -468 -393 -454 -100 -298 -199 -191 -418 -96 -26 -165 -327 -230 -205 -120 -131 -51 -404 -43 -436 -156 -469 -468 -308 -95 -196 -288 -481 -457 -98 -282 -197 -187 -318 -318 -409 -470 -137 -369 -316 -169 -413 -85 -77 -0 -490 -87 -364 -179 -118 -134 -395 -282 -138 -238 -419 -15 -118 -72 -90 -307 -19 -435 -10 -277 -273 -306 -224 -309 -389 -327 -242 -369 -392 -272 -331 -401 -242 -452 -177 -226 -5 -497 -402 -396 -317 -395 -58 -35 -336 -95 -11 -168 -34 -229 -233 -143 -472 -322 -498 -160 -195 -42 -321 -430 -119 -489 -458 -78 -76 -41 -223 -492 -149 -449 -218 -228 -138 -453 -30 -209 -64 -468 -76 -74 -342 -69 -230 -33 -368 -103 -296 -113 -216 -367 -344 -167 -274 -219 -239 -485 -116 -223 -256 -263 -70 -487 -480 -401 -288 -191 -5 -244 -438 -128 -467 -432 -202 -316 -229 -469 -463 -280 -2 -35 -283 -331 -235 -80 -44 -193 -321 -335 -104 -466 -366 -175 -403 -483 -53 -105 -257 -406 -409 -190 -406 -401 -114 -258 -90 -203 -262 -348 -424 -12 -396 -201 -217 -164 -431 -454 -478 -298 -125 -431 -164 -424 -187 -382 -5 -70 -397 -480 -291 -24 -351 -255 -104 -70 -163 -438 -119 -414 -200 -491 -237 -439 -360 -248 -479 -305 -417 -199 -444 -120 -429 -169 -443 -323 -325 -277 -230 -478 -178 -468 -310 -317 -333 -493 -460 -207 -249 -265 -480 -83 -136 -353 -172 -214 -462 -233 -406 -133 -175 -189 -454 -375 -401 -421 -407 -384 -256 -26 -134 -67 -384 -379 -18 -462 -492 -100 -298 -9 -341 -498 -146 -458 -362 -186 -285 -348 -167 -18 -273 -183 -281 -344 -97 -469 -315 -84 -28 -37 -448 -152 -348 -307 -194 -414 -477 -222 -126 -90 -169 -403 -400 -200 -97 Index: ql/src/test/results/clientpositive/groupby3_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_map.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby3_map.q.out (working copy) @@ -46,11 +46,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) - keys: _col0 (type: string) + aggregations: sum($f0), avg($f0), avg(DISTINCT $f0), max($f0), min($f0), std($f0), stddev_samp($f0), variance($f0), var_samp($f0) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -63,15 +63,15 @@ Group By Operator aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -121,7 +121,7 @@ POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -156,4 +156,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055 98.0 0.0 142.92680950752 143.06995106519 20428.07288 20469.010897796 Index: ql/src/test/results/clientpositive/groupby12.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby12.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby12.q.out (working copy) @@ -26,24 +26,24 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col1:0._col0) + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: key, $f1, $f2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col1) (type: int), _col2 (type: bigint) + expressions: $f1 (type: bigint), $f2 (type: bigint) + outputColumnNames: _o__c0, _o__c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_o__c0) (type: int), _o__c1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -78,7 +78,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/create_genericudaf.q.out =================================================================== --- ql/src/test/results/clientpositive/create_genericudaf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/create_genericudaf.q.out (working copy) @@ -40,11 +40,11 @@ alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col1 + expressions: 1 (type: int), substr(value, 5) (type: string) + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: test_avg(1), test_avg(_col1) + aggregations: test_avg($f0), test_avg($f1) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -56,7 +56,7 @@ Group By Operator aggregations: test_avg(VALUE._col0), test_avg(VALUE._col1) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -86,7 +86,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -1.0 260.182 +NULL NULL PREHOOK: query: DROP TEMPORARY FUNCTIOn test_avg PREHOOK: type: DROPFUNCTION PREHOOK: Output: test_avg Index: ql/src/test/results/clientpositive/decimal_udf2.q.out =================================================================== --- ql/src/test/results/clientpositive/decimal_udf2.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/decimal_udf2.q.out (working copy) @@ -48,7 +48,7 @@ Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _o__c0, _o__c1, _o__c2, _o__c3, _o__c4, _o__c5, _o__c6 Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -74,7 +74,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### -NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295 PREHOOK: query: EXPLAIN SELECT exp(key), ln(key), @@ -105,7 +104,7 @@ Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _o__c0, _o__c1, _o__c2, _o__c3, _o__c4, _o__c5, _o__c6, _o__c7 Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -137,7 +136,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### -22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@decimal_udf2 Index: ql/src/test/results/clientpositive/subquery_notin.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_notin.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/subquery_notin.q.out (working copy) @@ -346,7 +346,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -354,7 +354,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) + predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -501,7 +501,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -509,7 +509,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -628,7 +628,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -636,7 +636,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -775,7 +775,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -783,7 +783,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -950,7 +950,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -958,7 +958,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) @@ -1141,7 +1141,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1149,7 +1149,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/ptf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/ptf.q.out (working copy) @@ -97,28 +97,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -313,7 +313,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -321,7 +321,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -585,28 +585,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -766,21 +766,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -788,7 +788,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -982,21 +982,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -1004,7 +1004,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1480,7 +1480,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1488,7 +1488,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1657,28 +1657,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1834,28 +1834,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2081,28 +2081,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2268,20 +2268,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2492,33 +2492,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -2526,7 +2526,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2853,14 +2853,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3075,28 +3075,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3150,15 +3150,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3176,7 +3176,7 @@ sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -3196,35 +3196,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3551,28 +3551,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3841,28 +3841,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4097,28 +4097,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4400,28 +4400,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4675,28 +4675,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4937,28 +4937,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/fetch_aggregation.q.out =================================================================== --- ql/src/test/results/clientpositive/fetch_aggregation.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/fetch_aggregation.q.out (working copy) @@ -15,12 +15,8 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), sum(_col0), avg(_col0), min(_col0), max(_col0), std(_col0), variance(_col0) + aggregations: count(key), sum(key), avg(key), min(key), max(key), std(key), variance(key) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE @@ -38,8 +34,8 @@ Group By Operator aggregations: count(_col0), sum(_col1), avg(_col2), min(_col3), max(_col4), std(_col5), variance(_col6) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select count(key),sum(key),avg(key),min(key),max(key),std(key),variance(key) from src Index: ql/src/test/results/clientpositive/groupby_sort_2.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_2.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_sort_2.q.out (working copy) @@ -63,38 +63,38 @@ Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: val (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + expressions: val (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count($f1) bucketGroup: true - keys: _col0 (type: string) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + expressions: $f0 (type: string), UDFToInteger($f1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -134,9 +134,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -11 1 -12 1 -13 1 -17 1 -18 1 -28 1 Index: ql/src/test/results/clientpositive/bucket_groupby.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_groupby.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/bucket_groupby.q.out (working copy) @@ -57,13 +57,16 @@ TableScan alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '100') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -78,7 +81,7 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -93,7 +96,7 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink @@ -107,16 +110,6 @@ POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=100 #### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 PREHOOK: query: describe extended clustergroupby PREHOOK: type: DESCTABLE PREHOOK: Input: default@clustergroupby @@ -161,30 +154,33 @@ Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '101') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -198,30 +194,22 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby +PREHOOK: Input: default@clustergroupby@ds=100 PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby +POSTHOOK: Input: default@clustergroupby@ds=100 POSTHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 PREHOOK: query: --function-- explain select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10 @@ -240,30 +228,33 @@ Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '101') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: length(key) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: length(key) (type: int), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: int) + aggregations: count($f1) + keys: $f0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -277,23 +268,22 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink PREHOOK: query: select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby +PREHOOK: Input: default@clustergroupby@ds=100 PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### POSTHOOK: query: select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby +POSTHOOK: Input: default@clustergroupby@ds=100 POSTHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -1 10 -2 74 -3 416 PREHOOK: query: explain select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10 PREHOOK: type: QUERY @@ -310,30 +300,33 @@ Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '101') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: abs(length(key)) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: abs(length(key)) (type: int), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: int) + aggregations: count($f1) + keys: $f0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -347,23 +340,22 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink PREHOOK: query: select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby +PREHOOK: Input: default@clustergroupby@ds=100 PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### POSTHOOK: query: select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby +POSTHOOK: Input: default@clustergroupby@ds=100 POSTHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -1 10 -2 74 -3 416 PREHOOK: query: --constant-- explain select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10 @@ -382,34 +374,37 @@ Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '101') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), 3 (type: int), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), 3 (type: int) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + expressions: $f0 (type: string), $f2 (type: bigint) + outputColumnNames: key, _o__c1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -423,30 +418,22 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby +PREHOOK: Input: default@clustergroupby@ds=100 PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby +POSTHOOK: Input: default@clustergroupby@ds=100 POSTHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 PREHOOK: query: --subquery-- explain select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10 @@ -465,30 +452,33 @@ Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '101') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -502,30 +492,22 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink PREHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby +PREHOOK: Input: default@clustergroupby@ds=100 PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### POSTHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby +POSTHOOK: Input: default@clustergroupby@ds=100 POSTHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -val_0 3 -val_10 1 -val_100 2 -val_103 2 -val_104 2 -val_105 1 -val_11 1 -val_111 1 -val_113 2 -val_114 1 PREHOOK: query: explain select key, count(1) from clustergroupby group by key PREHOOK: type: QUERY @@ -544,12 +526,12 @@ alias: clustergroupby Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -564,7 +546,7 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -592,315 +574,6 @@ POSTHOOK: Input: default@clustergroupby@ds=100 POSTHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -0 6 -10 2 -100 4 -103 4 -104 4 -105 2 -11 2 -111 2 -113 4 -114 2 -116 2 -118 4 -119 6 -12 4 -120 4 -125 4 -126 2 -128 6 -129 4 -131 2 -133 2 -134 4 -136 2 -137 4 -138 8 -143 2 -145 2 -146 4 -149 4 -15 4 -150 2 -152 4 -153 2 -155 2 -156 2 -157 2 -158 2 -160 2 -162 2 -163 2 -164 4 -165 4 -166 2 -167 6 -168 2 -169 8 -17 2 -170 2 -172 4 -174 4 -175 4 -176 4 -177 2 -178 2 -179 4 -18 4 -180 2 -181 2 -183 2 -186 2 -187 6 -189 2 -19 2 -190 2 -191 4 -192 2 -193 6 -194 2 -195 4 -196 2 -197 4 -199 6 -2 2 -20 2 -200 4 -201 2 -202 2 -203 4 -205 4 -207 4 -208 6 -209 4 -213 4 -214 2 -216 4 -217 4 -218 2 -219 4 -221 4 -222 2 -223 4 -224 4 -226 2 -228 2 -229 4 -230 10 -233 4 -235 2 -237 4 -238 4 -239 4 -24 4 -241 2 -242 4 -244 2 -247 2 -248 2 -249 2 -252 2 -255 4 -256 4 -257 2 -258 2 -26 4 -260 2 -262 2 -263 2 -265 4 -266 2 -27 2 -272 4 -273 6 -274 2 -275 2 -277 8 -278 4 -28 2 -280 4 -281 4 -282 4 -283 2 -284 2 -285 2 -286 2 -287 2 -288 4 -289 2 -291 2 -292 2 -296 2 -298 6 -30 2 -302 2 -305 2 -306 2 -307 4 -308 2 -309 4 -310 2 -311 6 -315 2 -316 6 -317 4 -318 6 -321 4 -322 4 -323 2 -325 4 -327 6 -33 2 -331 4 -332 2 -333 4 -335 2 -336 2 -338 2 -339 2 -34 2 -341 2 -342 4 -344 4 -345 2 -348 10 -35 6 -351 2 -353 4 -356 2 -360 2 -362 2 -364 2 -365 2 -366 2 -367 4 -368 2 -369 6 -37 4 -373 2 -374 2 -375 2 -377 2 -378 2 -379 2 -382 4 -384 6 -386 2 -389 2 -392 2 -393 2 -394 2 -395 4 -396 6 -397 4 -399 4 -4 2 -400 2 -401 10 -402 2 -403 6 -404 4 -406 8 -407 2 -409 6 -41 2 -411 2 -413 4 -414 4 -417 6 -418 2 -419 2 -42 4 -421 2 -424 4 -427 2 -429 4 -43 2 -430 6 -431 6 -432 2 -435 2 -436 2 -437 2 -438 6 -439 4 -44 2 -443 2 -444 2 -446 2 -448 2 -449 2 -452 2 -453 2 -454 6 -455 2 -457 2 -458 4 -459 4 -460 2 -462 4 -463 4 -466 6 -467 2 -468 8 -469 10 -47 2 -470 2 -472 2 -475 2 -477 2 -478 4 -479 2 -480 6 -481 2 -482 2 -483 2 -484 2 -485 2 -487 2 -489 8 -490 2 -491 2 -492 4 -493 2 -494 2 -495 2 -496 2 -497 2 -498 6 -5 6 -51 4 -53 2 -54 2 -57 2 -58 4 -64 2 -65 2 -66 2 -67 4 -69 2 -70 6 -72 4 -74 2 -76 4 -77 2 -78 2 -8 2 -80 2 -82 2 -83 4 -84 4 -85 2 -86 2 -87 2 -9 2 -90 6 -92 2 -95 4 -96 2 -97 4 -98 4 PREHOOK: query: explain select key, count(1) from clustergroupby group by key, 3 PREHOOK: type: QUERY @@ -919,12 +592,12 @@ alias: clustergroupby Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), 3 (type: int), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), 3 (type: int) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -939,11 +612,11 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: $f0 (type: string), $f2 (type: bigint) + outputColumnNames: key, _o__c1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1030,14 +703,17 @@ TableScan alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '102') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count($f1) bucketGroup: true - keys: _col0 (type: string) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1052,7 +728,7 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -1067,7 +743,7 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink @@ -1108,13 +784,16 @@ TableScan alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '102') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: value (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1129,7 +808,7 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -1144,7 +823,7 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink @@ -1158,16 +837,6 @@ POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=102 #### A masked pattern was here #### -val_0 3 -val_10 1 -val_100 2 -val_103 2 -val_104 2 -val_105 1 -val_11 1 -val_111 1 -val_113 2 -val_114 1 PREHOOK: query: explain select key, count(1) from clustergroupby where ds='102' group by key, value limit 10 PREHOOK: type: QUERY @@ -1185,14 +854,17 @@ TableScan alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '102') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count($f2) bucketGroup: true - keys: _col0 (type: string), _col1 (type: string) + keys: $f0 (type: string), $f1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1207,11 +879,11 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: $f0 (type: string), $f2 (type: bigint) + outputColumnNames: key, _o__c1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -1226,7 +898,7 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink @@ -1319,14 +991,17 @@ TableScan alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '103') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count($f1) bucketGroup: true - keys: _col0 (type: string) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1341,7 +1016,7 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -1356,7 +1031,7 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink @@ -1397,13 +1072,16 @@ TableScan alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '103') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string), key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1418,11 +1096,11 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: $f1 (type: string), $f2 (type: bigint) + outputColumnNames: key, _o__c1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -1437,7 +1115,7 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink @@ -1451,13 +1129,3 @@ POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=103 #### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 Index: ql/src/test/results/clientpositive/groupby5.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby5.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby5.q.out (working copy) @@ -33,14 +33,14 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: $f1 (type: string) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -70,10 +70,10 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + expressions: UDFToInteger($f0) (type: int), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/auto_join24.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join24.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/auto_join24.q.out (working copy) @@ -40,10 +40,10 @@ a TableScan alias: a - Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) @@ -54,10 +54,10 @@ Map Operator Tree: TableScan alias: b - Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -65,7 +65,7 @@ 0 key (type: string) 1 key (type: string) outputColumnNames: _col1 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator aggregations: sum(_col1) mode: hash @@ -105,4 +105,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tst1 #### A masked pattern was here #### -500 +NULL Index: ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out (working copy) @@ -31,24 +31,27 @@ alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: $f0 (type: string), $f1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: $f0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: $f0, $f1, $f2, $f3, $f4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string), $f3 (type: double), $f4 (type: bigint) + outputColumnNames: _o__c0, _o__c1, _o__c2, _o__c3, _o__c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string), UDFToInteger(_o__c3) (type: int), UDFToInteger(_o__c4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/archive_multi.q.out =================================================================== --- ql/src/test/results/clientpositive/archive_multi.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/archive_multi.q.out (working copy) @@ -35,12 +35,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: ac_test@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: query: insert overwrite table ac_test.tstsrcpart partition (ds='2008-04-08', hr='11') select key, value from default.srcpart where ds='2008-04-08' and hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: ac_test@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] @@ -48,13 +54,19 @@ select key, value from default.srcpart where ds='2008-04-08' and hr='12' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: ac_test@tstsrcpart@ds=2008-04-08/hr=12 POSTHOOK: query: insert overwrite table ac_test.tstsrcpart partition (ds='2008-04-08', hr='12') select key, value from default.srcpart where ds='2008-04-08' and hr='12' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: ac_test@tstsrcpart@ds=2008-04-08/hr=12 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] @@ -62,13 +74,19 @@ select key, value from default.srcpart where ds='2008-04-09' and hr='11' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: ac_test@tstsrcpart@ds=2008-04-09/hr=11 POSTHOOK: query: insert overwrite table ac_test.tstsrcpart partition (ds='2008-04-09', hr='11') select key, value from default.srcpart where ds='2008-04-09' and hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: ac_test@tstsrcpart@ds=2008-04-09/hr=11 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] @@ -76,12 +94,18 @@ select key, value from default.srcpart where ds='2008-04-09' and hr='12' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: ac_test@tstsrcpart@ds=2008-04-09/hr=12 POSTHOOK: query: insert overwrite table ac_test.tstsrcpart partition (ds='2008-04-09', hr='12') select key, value from default.srcpart where ds='2008-04-09' and hr='12' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: ac_test@tstsrcpart@ds=2008-04-09/hr=12 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] @@ -104,7 +128,7 @@ POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -48479881068 +NULL PREHOOK: query: ALTER TABLE ac_test.tstsrcpart ARCHIVE PARTITION (ds='2008-04-08') PREHOOK: type: ALTERTABLE_ARCHIVE PREHOOK: Input: ac_test@tstsrcpart @@ -129,18 +153,23 @@ POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -48479881068 +NULL PREHOOK: query: SELECT key, count(1) FROM ac_test.tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: ac_test@tstsrcpart +PREHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=11 PREHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: ac_test@tstsrcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: ac_test@tstsrcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT key, count(1) FROM ac_test.tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: ac_test@tstsrcpart +POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -0 3 PREHOOK: query: SELECT * FROM ac_test.tstsrcpart a JOIN ac_test.tstsrc b ON a.key=b.key WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' PREHOOK: type: QUERY @@ -155,15 +184,6 @@ POSTHOOK: Input: ac_test@tstsrcpart POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 PREHOOK: query: ALTER TABLE ac_test.tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08') PREHOOK: type: ALTERTABLE_UNARCHIVE PREHOOK: Input: ac_test@tstsrcpart @@ -188,4 +208,4 @@ POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: ac_test@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -48479881068 +NULL Index: ql/src/test/results/clientpositive/groupby1_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_noskew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby1_noskew.q.out (working copy) @@ -30,23 +30,23 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: $f0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: $f1 (type: string) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + expressions: UDFToInteger($f0) (type: int), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/binarysortable_1.q.out =================================================================== --- ql/src/test/results/clientpositive/binarysortable_1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/binarysortable_1.q.out (working copy) @@ -47,13 +47,9 @@ TableScan alias: mytable Statistics: Num rows: 0 Data size: 93 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 93 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) + aggregations: sum(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -68,11 +64,11 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: key, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: regexp_replace(regexp_replace(regexp_replace(_col0, '', '^A'), '', '^@'), '', '^B') (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + expressions: regexp_replace(regexp_replace(regexp_replace(key, '', '^A'), '', '^@'), '', '^B') (type: string), $f1 (type: double) + outputColumnNames: _o__c0, value Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -106,13 +102,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@mytable #### A masked pattern was here #### -^@^@^@ 7.0 -^@^A^@ 9.0 -^@test^@ 2.0 -^A^@^A 10.0 -^A^A^A 8.0 -^Atest^A 3.0 -a^@bc^A^B^A^@ 1.0 -test^@^@^A^Atest 6.0 -test^@test 4.0 -test^Atest 5.0 Index: ql/src/test/results/clientpositive/groupby4_map_skew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby4_map_skew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby4_map_skew.q.out (working copy) @@ -25,9 +25,11 @@ alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 1 (type: int) + outputColumnNames: $f0 + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count($f0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -39,10 +41,10 @@ Group By Operator aggregations: count(VALUE._col0) mode: final - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(_col0) (type: int) + expressions: UDFToInteger($f0) (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -84,4 +86,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -500 +0 Index: ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out =================================================================== --- ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out (working copy) @@ -13,8 +13,6 @@ POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select '1', '1', '1' from src where key=150 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@dynamic_part_table@partcol1=1/partcol2=1 -POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=1).intcol SIMPLE [] PREHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, '1' from src where key=150 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -22,8 +20,6 @@ POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, '1' from src where key=150 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@dynamic_part_table@partcol1=__HIVE_DEFAULT_PARTITION__/partcol2=1 -POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] PREHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select '1', '1', NULL from src where key=150 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -31,8 +27,6 @@ POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select '1', '1', NULL from src where key=150 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@dynamic_part_table@partcol1=1/partcol2=__HIVE_DEFAULT_PARTITION__ -POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] PREHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, NULL from src where key=150 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -40,8 +34,6 @@ POSTHOOK: query: insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, NULL from src where key=150 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@dynamic_part_table@partcol1=__HIVE_DEFAULT_PARTITION__/partcol2=__HIVE_DEFAULT_PARTITION__ -POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] PREHOOK: query: explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1' PREHOOK: type: QUERY POSTHOOK: query: explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1' @@ -74,66 +66,17 @@ STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - partcol1 1 - partcol2 1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - numFiles 1 - numRows 1 - partition_columns partcol1/partcol2 - partition_columns.types string:string - rawDataSize 1 - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - partition_columns partcol1/partcol2 - partition_columns.types string:string - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dynamic_part_table - name: default.dynamic_part_table Processor Tree: - TableScan - alias: dynamic_part_table - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1' @@ -168,66 +111,17 @@ STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - partcol1 1 - partcol2 1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - numFiles 1 - numRows 1 - partition_columns partcol1/partcol2 - partition_columns.types string:string - rawDataSize 1 - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - partition_columns partcol1/partcol2 - partition_columns.types string:string - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dynamic_part_table - name: default.dynamic_part_table Processor Tree: - TableScan - alias: dynamic_part_table - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain extended select intcol from dynamic_part_table where (partcol1='1' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') @@ -272,109 +166,16 @@ STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - partcol1 1 - partcol2 1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - numFiles 1 - numRows 1 - partition_columns partcol1/partcol2 - partition_columns.types string:string - rawDataSize 1 - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - partition_columns partcol1/partcol2 - partition_columns.types string:string - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dynamic_part_table - name: default.dynamic_part_table - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - partcol1 1 - partcol2 __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - numFiles 1 - numRows 1 - partition_columns partcol1/partcol2 - partition_columns.types string:string - rawDataSize 1 - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns intcol - columns.comments - columns.types string -#### A masked pattern was here #### - name default.dynamic_part_table - partition_columns partcol1/partcol2 - partition_columns.types string:string - serialization.ddl struct dynamic_part_table { string intcol} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dynamic_part_table - name: default.dynamic_part_table Processor Tree: - TableScan - alias: dynamic_part_table - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE ListSink Index: ql/src/test/results/clientpositive/groupby6_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby6_noskew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby6_noskew.q.out (working copy) @@ -32,18 +32,18 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: $f0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Index: ql/src/test/results/clientpositive/groupby_rollup1.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_rollup1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out (working copy) @@ -36,32 +36,31 @@ alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), val (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: $f0 (type: string), $f1 (type: string), $f2 (type: bigint) + outputColumnNames: key, val, _o__c2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -85,18 +84,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 11 1 -1 NULL 1 -2 12 1 -2 NULL 1 -3 13 1 -3 NULL 1 -7 17 1 -7 NULL 1 -8 18 1 -8 28 1 -8 NULL 2 -NULL NULL 6 PREHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup PREHOOK: type: QUERY @@ -114,32 +101,27 @@ TableScan alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), '0' (type: string), _col1 (type: string) + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col2 + outputColumnNames: key, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: key (type: string), $f1 (type: bigint) + outputColumnNames: key, _o__c1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -163,12 +145,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -NULL 6 PREHOOK: query: EXPLAIN SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup PREHOOK: type: QUERY @@ -188,12 +164,12 @@ alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), val (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -230,14 +206,13 @@ Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: $f0 (type: string), $f1 (type: string), $f2 (type: bigint) + outputColumnNames: key, val, _o__c2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -261,18 +236,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 11 1 -1 NULL 1 -2 12 1 -2 NULL 1 -3 13 1 -3 NULL 1 -7 17 1 -7 NULL 1 -8 18 1 -8 28 1 -8 NULL 2 -NULL NULL 6 PREHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup PREHOOK: type: QUERY @@ -290,13 +253,9 @@ TableScan alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), '0' (type: string), _col1 (type: string) + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -308,14 +267,13 @@ Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col2 + outputColumnNames: key, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: key (type: string), $f1 (type: bigint) + outputColumnNames: key, _o__c1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -339,12 +297,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 -NULL 6 PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -453,9 +405,13 @@ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -521,9 +477,13 @@ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Index: ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (working copy) @@ -93,9 +93,13 @@ outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), concat(_col0, _col2) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE @@ -114,9 +118,13 @@ outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), concat(_col0, _col2) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE @@ -132,9 +140,13 @@ outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), concat(_col0, _col2) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -331,9 +343,13 @@ outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), concat(_col0, _col2) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE @@ -352,9 +368,13 @@ outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), concat(_col0, _col2) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE @@ -370,9 +390,13 @@ outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), concat(_col0, _col2) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -460,9 +484,13 @@ outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE @@ -631,16 +659,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_h2 #### A masked pattern was here #### -0 1 00.0 0 3 -1 4 1878.0 878 6 -1 5 1729.0 729 8 -1 6 11282.0 1282 12 -1 6 11494.0 1494 11 -1 7 11171.0 1171 11 -1 7 11516.0 1516 10 -1 8 11263.0 1263 10 -1 9 12294.0 2294 14 -1 9 12654.0 2654 16 PREHOOK: query: SELECT * FROM dest_h3 PREHOOK: type: QUERY PREHOOK: Input: default@dest_h3 @@ -649,38 +667,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_h3 #### A masked pattern was here #### -5 1 5102.0 102 2 -5 1 5116.0 116 2 -5 1 515.0 15 3 -5 1 553.0 53 1 -5 1 554.0 54 1 -5 1 557.0 57 1 -6 1 6134.0 134 2 -6 1 664.0 64 1 -6 1 665.0 65 1 -6 1 666.0 66 1 -6 1 669.0 69 1 -7 1 7144.0 144 2 -7 1 7152.0 152 2 -7 1 7210.0 210 3 -7 1 774.0 74 1 -7 1 777.0 77 1 -7 1 778.0 78 1 -8 1 8166.0 166 2 -8 1 8168.0 168 2 -8 1 88.0 8 1 -8 1 880.0 80 1 -8 1 882.0 82 1 -8 1 885.0 85 1 -8 1 886.0 86 1 -8 1 887.0 87 1 -9 1 9190.0 190 2 -9 1 9194.0 194 2 -9 1 9196.0 196 2 -9 1 9270.0 270 3 -9 1 99.0 9 1 -9 1 992.0 92 1 -9 1 996.0 96 1 PREHOOK: query: DROP TABLE dest_g2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@dest_g2 Index: ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out (working copy) @@ -808,32 +808,32 @@ alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -843,10 +843,10 @@ 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col1 + outputColumnNames: _col2 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int) + expressions: _col2 (type: int) outputColumnNames: _col0 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator Index: ql/src/test/results/clientpositive/groupby1_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_limit.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby1_limit.q.out (working copy) @@ -18,9 +18,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -31,11 +30,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) + aggregations: sum($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -50,38 +49,15 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: double) + expressions: UDFToInteger($f0) (type: int), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE @@ -101,7 +77,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 @@ -122,8 +98,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 Index: ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out (working copy) @@ -185,11 +185,11 @@ (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 Where Clause SubQuery Joining Condition: - on part.p_size = sq_1._wcol0 + on part.p_size = sq_1.first_value_window_0 Rewritten Query: select p_mfgr, p_name, p_size -from part left semi join (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 on part.p_size = sq_1._wcol0 +from part left semi join (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 on part.p_size = sq_1.first_value_window_0 where 1 = 1 PREHOOK: query: -- non agg, non corr, with join in Parent Query explain rewrite Index: ql/src/test/results/clientpositive/subquery_in_having.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in_having.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/subquery_in_having.q.out (working copy) @@ -1357,17 +1357,17 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: first_value_window_0 arguments: _col1 name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(MAX)~ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _wcol0 is not null (type: boolean) + predicate: first_value_window_0 is not null (type: boolean) Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _wcol0 (type: string) + expressions: first_value_window_0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Group By Operator Index: ql/src/test/results/clientpositive/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/subquery_in.q.out (working copy) @@ -278,7 +278,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -286,7 +286,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -458,7 +458,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -466,7 +466,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out (working copy) @@ -346,28 +346,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -767,7 +767,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -775,7 +775,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1296,28 +1296,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1629,21 +1629,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1651,7 +1651,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1998,21 +1998,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -2020,7 +2020,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2954,7 +2954,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -2962,7 +2962,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3277,28 +3277,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3601,28 +3601,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3995,28 +3995,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4349,20 +4349,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4862,33 +4862,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -4896,7 +4896,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5450,14 +5450,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5898,28 +5898,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5969,15 +5969,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) @@ -5985,7 +5985,7 @@ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: false Reducer 5 Needs Tagging: false @@ -6008,35 +6008,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6626,28 +6626,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7076,28 +7076,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7487,28 +7487,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7949,28 +7949,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8420,28 +8420,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8850,28 +8850,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/spark/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/subquery_in.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out (working copy) @@ -327,7 +327,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -335,7 +335,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -495,7 +495,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -503,7 +503,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/spark/ptf_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/ptf_streaming.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/spark/ptf_streaming.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -615,7 +615,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -623,7 +623,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -788,28 +788,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1021,28 +1021,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1256,28 +1256,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1491,28 +1491,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1705,33 +1705,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1739,7 +1739,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1980,28 +1980,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2246,28 +2246,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2496,28 +2496,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/spark/ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/ptf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/spark/ptf.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -571,28 +571,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -748,21 +748,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -770,7 +770,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -950,21 +950,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -972,7 +972,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1440,7 +1440,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1448,7 +1448,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1613,28 +1613,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1786,28 +1786,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2019,28 +2019,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2202,20 +2202,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2414,33 +2414,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -2448,7 +2448,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2767,14 +2767,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2954,28 +2954,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3006,22 +3006,22 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) Reducer 5 Reduce Operator Tree: Select Operator @@ -3042,35 +3042,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3444,28 +3444,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3710,28 +3710,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3952,28 +3952,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4231,28 +4231,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4492,28 +4492,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4740,28 +4740,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/spark/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join33.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673601) +++ ql/src/test/results/clientpositive/spark/join33.q.out (working copy) @@ -113,16 +113,16 @@ Map 1 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,9 +136,12 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -146,11 +149,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -160,26 +165,23 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Map 3 Map Operator Tree: TableScan @@ -188,7 +190,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -197,7 +199,7 @@ Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -258,24 +260,24 @@ Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -286,13 +288,13 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -328,12 +330,9 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -341,13 +340,11 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -357,23 +354,26 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/spark/join32_lessSize.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join32_lessSize.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673601) +++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out (working copy) @@ -121,16 +121,16 @@ Map 1 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -144,9 +144,12 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -154,11 +157,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -168,26 +173,23 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Map 3 Map Operator Tree: TableScan @@ -196,7 +198,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -205,7 +207,7 @@ Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -266,24 +268,24 @@ Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -294,13 +296,13 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -336,12 +338,9 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -349,13 +348,11 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -365,23 +362,26 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-0 Move Operator @@ -430,8 +430,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -613,35 +613,34 @@ STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-3 is a root stage Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 1 Local Work: Map Reduce Local Work @@ -650,7 +649,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -660,14 +659,14 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -680,44 +679,39 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /src [w] + Map 3 Map Operator Tree: TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col1 (type: string) - Position of Big Table: 1 + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -725,7 +719,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -735,14 +729,14 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -755,20 +749,20 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [w] + /src1 [x] Map 4 Map Operator Tree: TableScan @@ -783,21 +777,10 @@ expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -875,11 +858,22 @@ keys: 0 _col0 (type: string) 1 _col1 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1, _col4 input vertices: 1 Map 4 Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -890,17 +884,17 @@ input vertices: 0 Map 1 Position of Big Table: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat Index: ql/src/test/results/clientpositive/spark/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join32.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673601) +++ ql/src/test/results/clientpositive/spark/join32.q.out (working copy) @@ -113,16 +113,16 @@ Map 1 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,9 +136,12 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -146,11 +149,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -160,26 +165,23 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Map 3 Map Operator Tree: TableScan @@ -188,7 +190,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -197,7 +199,7 @@ Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -258,24 +260,24 @@ Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -286,13 +288,13 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -328,12 +330,9 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -341,13 +340,11 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -357,23 +354,26 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673601) +++ ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (working copy) @@ -390,9 +390,9 @@ Stage: Stage-1 Spark Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -401,81 +401,99 @@ alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -485,38 +503,22 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -542,9 +544,9 @@ Stage: Stage-1 Spark Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -553,35 +555,36 @@ alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -595,39 +598,56 @@ outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -637,38 +657,22 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/groupby1_map_nomap.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_map_nomap.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby1_map_nomap.q.out (working copy) @@ -26,11 +26,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) + aggregations: sum($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -45,10 +45,10 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + expressions: UDFToInteger($f0) (type: int), $f1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -398,5 +398,5 @@ 92 92.0 95 190.0 96 96.0 -97 194.0 +97 97.0 98 196.0 Index: ql/src/test/results/clientpositive/groupby_sort_9.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_9.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_sort_9.q.out (working copy) @@ -63,34 +63,34 @@ Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count($f1) bucketGroup: true - keys: _col0 (type: string) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -114,11 +114,6 @@ POSTHOOK: Input: default@t1@ds=1 POSTHOOK: Input: default@t1@ds=2 #### A masked pattern was here #### -1 2 -2 2 -3 2 -7 2 -8 4 PREHOOK: query: DROP TABLE T1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@t1 Index: ql/src/test/results/clientpositive/alter_varchar2.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_varchar2.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/alter_varchar2.q.out (working copy) @@ -37,7 +37,6 @@ POSTHOOK: Input: default@alter_varchar2 POSTHOOK: Input: default@alter_varchar2@hr=1 #### A masked pattern was here #### -val_238 7 PREHOOK: query: alter table alter_varchar2 change column c1 c1 varchar(10) PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: default@alter_varchar2 @@ -56,7 +55,6 @@ POSTHOOK: Input: default@alter_varchar2 POSTHOOK: Input: default@alter_varchar2@hr=1 #### A masked pattern was here #### -1 val_238 7 PREHOOK: query: insert overwrite table alter_varchar2 partition (hr=2) select key from src tablesample (1 rows) PREHOOK: type: QUERY @@ -72,21 +70,23 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alter_varchar2 PREHOOK: Input: default@alter_varchar2@hr=1 +PREHOOK: Input: default@alter_varchar2@hr=2 #### A masked pattern was here #### POSTHOOK: query: select hr, c1, length(c1) from alter_varchar2 where hr = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_varchar2 POSTHOOK: Input: default@alter_varchar2@hr=1 +POSTHOOK: Input: default@alter_varchar2@hr=2 #### A masked pattern was here #### -1 val_238 7 PREHOOK: query: select hr, c1, length(c1) from alter_varchar2 where hr = 2 PREHOOK: type: QUERY PREHOOK: Input: default@alter_varchar2 +PREHOOK: Input: default@alter_varchar2@hr=1 PREHOOK: Input: default@alter_varchar2@hr=2 #### A masked pattern was here #### POSTHOOK: query: select hr, c1, length(c1) from alter_varchar2 where hr = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_varchar2 +POSTHOOK: Input: default@alter_varchar2@hr=1 POSTHOOK: Input: default@alter_varchar2@hr=2 #### A masked pattern was here #### -2 238 3 Index: ql/src/test/results/clientpositive/acid_vectorization_partition.q.out =================================================================== --- ql/src/test/results/clientpositive/acid_vectorization_partition.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/acid_vectorization_partition.q.out (working copy) @@ -38,23 +38,3 @@ POSTHOOK: Input: default@acid_vectorized_part@ds=today POSTHOOK: Input: default@acid_vectorized_part@ds=tomorrow #### A masked pattern was here #### --1073279343 oj1YrV5Wa today --1073279343 oj1YrV5Wa tomorrow --1073051226 A34p7oRr2WvUJNf tomorrow --1073051226 A34p7oRr2WvUJNf today --1072910839 0iqrc5 tomorrow --1072910839 0iqrc5 today --1072081801 dPkN74F7 today --1072081801 dPkN74F7 tomorrow --1072076362 2uLyD28144vklju213J1mr today --1072076362 2uLyD28144vklju213J1mr tomorrow --1071480828 aw724t8c5558x2xneC624 tomorrow --1071480828 aw724t8c5558x2xneC624 today --1071363017 Anj0oF today --1071363017 Anj0oF tomorrow --1070883071 0ruyd6Y50JpdGRf6HqD tomorrow --1070883071 0ruyd6Y50JpdGRf6HqD today --1070551679 iUR3Q today --1070551679 iUR3Q tomorrow --1069736047 k17Am8uPHWk02cEf1jet tomorrow --1069736047 k17Am8uPHWk02cEf1jet today Index: ql/src/test/results/clientpositive/alter_rename_partition.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_rename_partition.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/alter_rename_partition.q.out (working copy) @@ -70,12 +70,6 @@ POSTHOOK: Input: default@alter_rename_partition POSTHOOK: Input: default@alter_rename_partition@pcol1=old_part1%3A/pcol2=old_part2%3A #### A masked pattern was here #### -1 old_part1: old_part2: -2 old_part1: old_part2: -3 old_part1: old_part2: -4 old_part1: old_part2: -5 old_part1: old_part2: -6 old_part1: old_part2: PREHOOK: query: alter table alter_rename_partition partition (pCol1='old_part1:', pcol2='old_part2:') rename to partition (pCol1='new_part1:', pcol2='new_part2:') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: default@alter_rename_partition @@ -96,10 +90,12 @@ PREHOOK: query: select * from alter_rename_partition where pcol1='old_part1:' and pcol2='old_part2:' PREHOOK: type: QUERY PREHOOK: Input: default@alter_rename_partition +PREHOOK: Input: default@alter_rename_partition@pcol1=new_part1%3A/pcol2=new_part2%3A #### A masked pattern was here #### POSTHOOK: query: select * from alter_rename_partition where pcol1='old_part1:' and pcol2='old_part2:' POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_rename_partition +POSTHOOK: Input: default@alter_rename_partition@pcol1=new_part1%3A/pcol2=new_part2%3A #### A masked pattern was here #### PREHOOK: query: select * from alter_rename_partition where pcol1='new_part1:' and pcol2='new_part2:' PREHOOK: type: QUERY @@ -111,12 +107,6 @@ POSTHOOK: Input: default@alter_rename_partition POSTHOOK: Input: default@alter_rename_partition@pcol1=new_part1%3A/pcol2=new_part2%3A #### A masked pattern was here #### -1 new_part1: new_part2: -2 new_part1: new_part2: -3 new_part1: new_part2: -4 new_part1: new_part2: -5 new_part1: new_part2: -6 new_part1: new_part2: PREHOOK: query: -- Cleanup DROP TABLE alter_rename_partition_src PREHOOK: type: DROPTABLE @@ -217,12 +207,6 @@ POSTHOOK: Input: alter_rename_partition_db@alter_rename_partition POSTHOOK: Input: alter_rename_partition_db@alter_rename_partition@pcol1=old_part1%3A/pcol2=old_part2%3A #### A masked pattern was here #### -1 old_part1: old_part2: -2 old_part1: old_part2: -3 old_part1: old_part2: -4 old_part1: old_part2: -5 old_part1: old_part2: -6 old_part1: old_part2: PREHOOK: query: ALTER TABLE alter_rename_partition PARTITION (pCol1='old_part1:', pcol2='old_part2:') RENAME TO PARTITION (pCol1='new_part1:', pcol2='new_part2:') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: alter_rename_partition_db@alter_rename_partition @@ -243,10 +227,12 @@ PREHOOK: query: SELECT * FROM alter_rename_partition WHERE pcol1='old_part1:' and pcol2='old_part2:' PREHOOK: type: QUERY PREHOOK: Input: alter_rename_partition_db@alter_rename_partition +PREHOOK: Input: alter_rename_partition_db@alter_rename_partition@pcol1=new_part1%3A/pcol2=new_part2%3A #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM alter_rename_partition WHERE pcol1='old_part1:' and pcol2='old_part2:' POSTHOOK: type: QUERY POSTHOOK: Input: alter_rename_partition_db@alter_rename_partition +POSTHOOK: Input: alter_rename_partition_db@alter_rename_partition@pcol1=new_part1%3A/pcol2=new_part2%3A #### A masked pattern was here #### PREHOOK: query: SELECT * FROM alter_rename_partition WHERE pcol1='new_part1:' and pcol2='new_part2:' PREHOOK: type: QUERY @@ -258,9 +244,3 @@ POSTHOOK: Input: alter_rename_partition_db@alter_rename_partition POSTHOOK: Input: alter_rename_partition_db@alter_rename_partition@pcol1=new_part1%3A/pcol2=new_part2%3A #### A masked pattern was here #### -1 new_part1: new_part2: -2 new_part1: new_part2: -3 new_part1: new_part2: -4 new_part1: new_part2: -5 new_part1: new_part2: -6 new_part1: new_part2: Index: ql/src/test/results/clientpositive/annotate_stats_table.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_table.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out (working copy) @@ -45,10 +45,6 @@ TableScan alias: emp_orc Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: lastname (type: string), deptid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging @@ -89,11 +85,7 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 3 Data size: 396 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: lastname (type: string), deptid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: -- table level analyze statistics @@ -122,11 +114,7 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: lastname (type: string), deptid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: -- column level partial statistics @@ -155,11 +143,7 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: lastname (type: string), deptid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL ListSink PREHOOK: query: -- all selected columns have statistics @@ -180,11 +164,7 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- column level complete statistics @@ -213,11 +193,7 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE @@ -236,11 +212,7 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE @@ -259,11 +231,7 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE @@ -282,10 +250,6 @@ Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink Index: ql/src/test/results/clientpositive/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/join33.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/join33.q.out (working copy) @@ -109,25 +109,71 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -141,7 +187,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -150,31 +196,31 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -183,11 +229,11 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -356,7 +402,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -405,8 +451,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/groupby_sort_4.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_4.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_sort_4.q.out (working copy) @@ -63,38 +63,38 @@ Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count($f1) bucketGroup: true - keys: _col0 (type: string) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + expressions: $f0 (type: string), UDFToInteger($f1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -134,11 +134,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 PREHOOK: query: CREATE TABLE outputTbl2(key STRING, val STRING, cnt INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -170,37 +165,37 @@ Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), val (type: string), 1 (type: int) + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) + aggregations: count($f2) + keys: $f0 (type: string), $f1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + expressions: $f0 (type: string), $f1 (type: string), UDFToInteger($f2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -241,9 +236,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### -1 11 1 -2 12 1 -3 13 1 -7 17 1 -8 18 1 -8 28 1 Index: ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out (working copy) @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[6][tables = [key, value, ds, hr, key, value]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT explain authorization select * from src join srcpart @@ -23,7 +23,7 @@ AUTHORIZATION_FAILURES: No privilege 'Select' found for inputs { database:default, table:src, columnName:key} No privilege 'Select' found for inputs { database:default, table:srcpart, columnName:key} -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[6][tables = [key, value, ds, hr, key, value]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain formatted authorization select * from src join srcpart PREHOOK: type: QUERY POSTHOOK: query: explain formatted authorization select * from src join srcpart Index: ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (working copy) @@ -231,7 +231,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -239,7 +239,7 @@ isPivotResult: true Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) @@ -399,7 +399,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -407,7 +407,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) @@ -842,7 +842,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -850,7 +850,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) + predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -997,7 +997,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1005,7 +1005,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) Index: ql/src/test/results/clientpositive/groupby5_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby5_map.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby5_map.q.out (working copy) @@ -24,12 +24,8 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0) + aggregations: sum(key) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -41,15 +37,15 @@ Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int) + expressions: UDFToInteger($f0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -86,4 +82,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091 +NULL Index: ql/src/test/results/clientpositive/avro_decimal.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_decimal.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/avro_decimal.q.out (working copy) @@ -34,7 +34,7 @@ POSTHOOK: Input: default@dec # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value decimal(8,4) -12.25 234.79 0 6 from deserializer +value decimal(8,4) 0 0 from deserializer PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec @@ -99,16 +99,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_dec #### A masked pattern was here #### -Tom 234.79 -Beck 77.34 -Snow 55.71 -Mary 4.33 -Cluck 5.96 -Tom -12.25 -Mary 33.33 -Tom 19 -Beck 0 -Beck 79.9 PREHOOK: query: DROP TABLE IF EXISTS avro_dec1 PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec1 @@ -171,16 +161,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_dec1 #### A masked pattern was here #### -234.8 -77.3 -55.7 -4.3 -6 -12.3 -33.3 -19 -3.2 -79.9 +Tom 234.8 +Beck 77.3 +Snow 55.7 +Mary 4.3 +Cluck 6 +Tom 12.3 +Mary 33.3 +Tom 19 +Beck 3.2 +Beck 79.9 PREHOOK: query: DROP TABLE dec PREHOOK: type: DROPTABLE PREHOOK: Input: default@dec Index: ql/src/test/results/clientpositive/alter_partition_protect_mode.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_partition_protect_mode.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/alter_partition_protect_mode.q.out (working copy) @@ -63,27 +63,19 @@ PREHOOK: query: select * from alter_part_protect_mode where year = '1996' PREHOOK: type: QUERY PREHOOK: Input: default@alter_part_protect_mode +PREHOOK: Input: default@alter_part_protect_mode@year=1994/month=07 +PREHOOK: Input: default@alter_part_protect_mode@year=1995/month=09 PREHOOK: Input: default@alter_part_protect_mode@year=1996/month=10 PREHOOK: Input: default@alter_part_protect_mode@year=1996/month=12 #### A masked pattern was here #### POSTHOOK: query: select * from alter_part_protect_mode where year = '1996' POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_part_protect_mode +POSTHOOK: Input: default@alter_part_protect_mode@year=1994/month=07 +POSTHOOK: Input: default@alter_part_protect_mode@year=1995/month=09 POSTHOOK: Input: default@alter_part_protect_mode@year=1996/month=10 POSTHOOK: Input: default@alter_part_protect_mode@year=1996/month=12 #### A masked pattern was here #### -1 11 1996 10 -2 12 1996 10 -3 13 1996 10 -7 17 1996 10 -8 18 1996 10 -8 28 1996 10 -1 11 1996 12 -2 12 1996 12 -3 13 1996 12 -7 17 1996 12 -8 18 1996 12 -8 28 1996 12 PREHOOK: query: alter table alter_part_protect_mode partition (year='1995') enable offline PREHOOK: type: ALTERPARTITION_PROTECTMODE PREHOOK: Input: default@alter_part_protect_mode @@ -105,19 +97,19 @@ PREHOOK: query: select * from alter_part_protect_mode where year = '1995' PREHOOK: type: QUERY PREHOOK: Input: default@alter_part_protect_mode +PREHOOK: Input: default@alter_part_protect_mode@year=1994/month=07 PREHOOK: Input: default@alter_part_protect_mode@year=1995/month=09 +PREHOOK: Input: default@alter_part_protect_mode@year=1996/month=10 +PREHOOK: Input: default@alter_part_protect_mode@year=1996/month=12 #### A masked pattern was here #### POSTHOOK: query: select * from alter_part_protect_mode where year = '1995' POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_part_protect_mode +POSTHOOK: Input: default@alter_part_protect_mode@year=1994/month=07 POSTHOOK: Input: default@alter_part_protect_mode@year=1995/month=09 +POSTHOOK: Input: default@alter_part_protect_mode@year=1996/month=10 +POSTHOOK: Input: default@alter_part_protect_mode@year=1996/month=12 #### A masked pattern was here #### -1 11 1995 09 -2 12 1995 09 -3 13 1995 09 -7 17 1995 09 -8 18 1995 09 -8 28 1995 09 PREHOOK: query: -- no_drop alter table alter_part_protect_mode partition (year='1996') enable no_drop PREHOOK: type: ALTERPARTITION_PROTECTMODE Index: ql/src/test/results/clientpositive/groupby2_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_map.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby2_map.q.out (working copy) @@ -32,11 +32,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col1), sum(_col1) - keys: _col0 (type: string), _col1 (type: string) + aggregations: count(DISTINCT $f1), sum($f1) + keys: $f0 (type: string), $f1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -51,10 +51,14 @@ aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string) + outputColumnNames: _o__c0, _o__c1, _o__c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -89,7 +93,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 @@ -100,13 +104,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 00.0 -1 71 116414.0 -2 69 225571.0 -3 62 332004.0 -4 74 452763.0 -5 6 5397.0 -6 5 6398.0 -7 6 7735.0 -8 8 8762.0 -9 7 91047.0 Index: ql/src/test/results/clientpositive/filter_numeric.q.out =================================================================== --- ql/src/test/results/clientpositive/filter_numeric.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/filter_numeric.q.out (working copy) @@ -15,41 +15,33 @@ PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@partint POSTHOOK: query: insert overwrite table partint partition(ds, hr) select * from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@partint@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@partint@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 1B2M2Y8AsgTpgAmY7PhCfg== PREHOOK: query: explain select key, value, hr from partint where hr < 11 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value, hr from partint where hr < 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: partint - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (hr < 11) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: select key, value, hr from partint where hr < 11 @@ -66,3133 +58,109 @@ POSTHOOK: query: explain select key, value, hr from partint where hr <= 12 and hr > 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: partint - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key, value, hr from partint where hr <= 12 and hr > 11 PREHOOK: type: QUERY PREHOOK: Input: default@partint -PREHOOK: Input: default@partint@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select key, value, hr from partint where hr <= 12 and hr > 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@partint -POSTHOOK: Input: default@partint@ds=2008-04-08/hr=12 #### A masked pattern was here #### -0 val_0 12 -0 val_0 12 -0 val_0 12 -10 val_10 12 -100 val_100 12 -100 val_100 12 -103 val_103 12 -103 val_103 12 -104 val_104 12 -104 val_104 12 -105 val_105 12 -11 val_11 12 -111 val_111 12 -113 val_113 12 -113 val_113 12 -114 val_114 12 -116 val_116 12 -118 val_118 12 -118 val_118 12 -119 val_119 12 -119 val_119 12 -119 val_119 12 -12 val_12 12 -12 val_12 12 -120 val_120 12 -120 val_120 12 -125 val_125 12 -125 val_125 12 -126 val_126 12 -128 val_128 12 -128 val_128 12 -128 val_128 12 -129 val_129 12 -129 val_129 12 -131 val_131 12 -133 val_133 12 -134 val_134 12 -134 val_134 12 -136 val_136 12 -137 val_137 12 -137 val_137 12 -138 val_138 12 -138 val_138 12 -138 val_138 12 -138 val_138 12 -143 val_143 12 -145 val_145 12 -146 val_146 12 -146 val_146 12 -149 val_149 12 -149 val_149 12 -15 val_15 12 -15 val_15 12 -150 val_150 12 -152 val_152 12 -152 val_152 12 -153 val_153 12 -155 val_155 12 -156 val_156 12 -157 val_157 12 -158 val_158 12 -160 val_160 12 -162 val_162 12 -163 val_163 12 -164 val_164 12 -164 val_164 12 -165 val_165 12 -165 val_165 12 -166 val_166 12 -167 val_167 12 -167 val_167 12 -167 val_167 12 -168 val_168 12 -169 val_169 12 -169 val_169 12 -169 val_169 12 -169 val_169 12 -17 val_17 12 -170 val_170 12 -172 val_172 12 -172 val_172 12 -174 val_174 12 -174 val_174 12 -175 val_175 12 -175 val_175 12 -176 val_176 12 -176 val_176 12 -177 val_177 12 -178 val_178 12 -179 val_179 12 -179 val_179 12 -18 val_18 12 -18 val_18 12 -180 val_180 12 -181 val_181 12 -183 val_183 12 -186 val_186 12 -187 val_187 12 -187 val_187 12 -187 val_187 12 -189 val_189 12 -19 val_19 12 -190 val_190 12 -191 val_191 12 -191 val_191 12 -192 val_192 12 -193 val_193 12 -193 val_193 12 -193 val_193 12 -194 val_194 12 -195 val_195 12 -195 val_195 12 -196 val_196 12 -197 val_197 12 -197 val_197 12 -199 val_199 12 -199 val_199 12 -199 val_199 12 -2 val_2 12 -20 val_20 12 -200 val_200 12 -200 val_200 12 -201 val_201 12 -202 val_202 12 -203 val_203 12 -203 val_203 12 -205 val_205 12 -205 val_205 12 -207 val_207 12 -207 val_207 12 -208 val_208 12 -208 val_208 12 -208 val_208 12 -209 val_209 12 -209 val_209 12 -213 val_213 12 -213 val_213 12 -214 val_214 12 -216 val_216 12 -216 val_216 12 -217 val_217 12 -217 val_217 12 -218 val_218 12 -219 val_219 12 -219 val_219 12 -221 val_221 12 -221 val_221 12 -222 val_222 12 -223 val_223 12 -223 val_223 12 -224 val_224 12 -224 val_224 12 -226 val_226 12 -228 val_228 12 -229 val_229 12 -229 val_229 12 -230 val_230 12 -230 val_230 12 -230 val_230 12 -230 val_230 12 -230 val_230 12 -233 val_233 12 -233 val_233 12 -235 val_235 12 -237 val_237 12 -237 val_237 12 -238 val_238 12 -238 val_238 12 -239 val_239 12 -239 val_239 12 -24 val_24 12 -24 val_24 12 -241 val_241 12 -242 val_242 12 -242 val_242 12 -244 val_244 12 -247 val_247 12 -248 val_248 12 -249 val_249 12 -252 val_252 12 -255 val_255 12 -255 val_255 12 -256 val_256 12 -256 val_256 12 -257 val_257 12 -258 val_258 12 -26 val_26 12 -26 val_26 12 -260 val_260 12 -262 val_262 12 -263 val_263 12 -265 val_265 12 -265 val_265 12 -266 val_266 12 -27 val_27 12 -272 val_272 12 -272 val_272 12 -273 val_273 12 -273 val_273 12 -273 val_273 12 -274 val_274 12 -275 val_275 12 -277 val_277 12 -277 val_277 12 -277 val_277 12 -277 val_277 12 -278 val_278 12 -278 val_278 12 -28 val_28 12 -280 val_280 12 -280 val_280 12 -281 val_281 12 -281 val_281 12 -282 val_282 12 -282 val_282 12 -283 val_283 12 -284 val_284 12 -285 val_285 12 -286 val_286 12 -287 val_287 12 -288 val_288 12 -288 val_288 12 -289 val_289 12 -291 val_291 12 -292 val_292 12 -296 val_296 12 -298 val_298 12 -298 val_298 12 -298 val_298 12 -30 val_30 12 -302 val_302 12 -305 val_305 12 -306 val_306 12 -307 val_307 12 -307 val_307 12 -308 val_308 12 -309 val_309 12 -309 val_309 12 -310 val_310 12 -311 val_311 12 -311 val_311 12 -311 val_311 12 -315 val_315 12 -316 val_316 12 -316 val_316 12 -316 val_316 12 -317 val_317 12 -317 val_317 12 -318 val_318 12 -318 val_318 12 -318 val_318 12 -321 val_321 12 -321 val_321 12 -322 val_322 12 -322 val_322 12 -323 val_323 12 -325 val_325 12 -325 val_325 12 -327 val_327 12 -327 val_327 12 -327 val_327 12 -33 val_33 12 -331 val_331 12 -331 val_331 12 -332 val_332 12 -333 val_333 12 -333 val_333 12 -335 val_335 12 -336 val_336 12 -338 val_338 12 -339 val_339 12 -34 val_34 12 -341 val_341 12 -342 val_342 12 -342 val_342 12 -344 val_344 12 -344 val_344 12 -345 val_345 12 -348 val_348 12 -348 val_348 12 -348 val_348 12 -348 val_348 12 -348 val_348 12 -35 val_35 12 -35 val_35 12 -35 val_35 12 -351 val_351 12 -353 val_353 12 -353 val_353 12 -356 val_356 12 -360 val_360 12 -362 val_362 12 -364 val_364 12 -365 val_365 12 -366 val_366 12 -367 val_367 12 -367 val_367 12 -368 val_368 12 -369 val_369 12 -369 val_369 12 -369 val_369 12 -37 val_37 12 -37 val_37 12 -373 val_373 12 -374 val_374 12 -375 val_375 12 -377 val_377 12 -378 val_378 12 -379 val_379 12 -382 val_382 12 -382 val_382 12 -384 val_384 12 -384 val_384 12 -384 val_384 12 -386 val_386 12 -389 val_389 12 -392 val_392 12 -393 val_393 12 -394 val_394 12 -395 val_395 12 -395 val_395 12 -396 val_396 12 -396 val_396 12 -396 val_396 12 -397 val_397 12 -397 val_397 12 -399 val_399 12 -399 val_399 12 -4 val_4 12 -400 val_400 12 -401 val_401 12 -401 val_401 12 -401 val_401 12 -401 val_401 12 -401 val_401 12 -402 val_402 12 -403 val_403 12 -403 val_403 12 -403 val_403 12 -404 val_404 12 -404 val_404 12 -406 val_406 12 -406 val_406 12 -406 val_406 12 -406 val_406 12 -407 val_407 12 -409 val_409 12 -409 val_409 12 -409 val_409 12 -41 val_41 12 -411 val_411 12 -413 val_413 12 -413 val_413 12 -414 val_414 12 -414 val_414 12 -417 val_417 12 -417 val_417 12 -417 val_417 12 -418 val_418 12 -419 val_419 12 -42 val_42 12 -42 val_42 12 -421 val_421 12 -424 val_424 12 -424 val_424 12 -427 val_427 12 -429 val_429 12 -429 val_429 12 -43 val_43 12 -430 val_430 12 -430 val_430 12 -430 val_430 12 -431 val_431 12 -431 val_431 12 -431 val_431 12 -432 val_432 12 -435 val_435 12 -436 val_436 12 -437 val_437 12 -438 val_438 12 -438 val_438 12 -438 val_438 12 -439 val_439 12 -439 val_439 12 -44 val_44 12 -443 val_443 12 -444 val_444 12 -446 val_446 12 -448 val_448 12 -449 val_449 12 -452 val_452 12 -453 val_453 12 -454 val_454 12 -454 val_454 12 -454 val_454 12 -455 val_455 12 -457 val_457 12 -458 val_458 12 -458 val_458 12 -459 val_459 12 -459 val_459 12 -460 val_460 12 -462 val_462 12 -462 val_462 12 -463 val_463 12 -463 val_463 12 -466 val_466 12 -466 val_466 12 -466 val_466 12 -467 val_467 12 -468 val_468 12 -468 val_468 12 -468 val_468 12 -468 val_468 12 -469 val_469 12 -469 val_469 12 -469 val_469 12 -469 val_469 12 -469 val_469 12 -47 val_47 12 -470 val_470 12 -472 val_472 12 -475 val_475 12 -477 val_477 12 -478 val_478 12 -478 val_478 12 -479 val_479 12 -480 val_480 12 -480 val_480 12 -480 val_480 12 -481 val_481 12 -482 val_482 12 -483 val_483 12 -484 val_484 12 -485 val_485 12 -487 val_487 12 -489 val_489 12 -489 val_489 12 -489 val_489 12 -489 val_489 12 -490 val_490 12 -491 val_491 12 -492 val_492 12 -492 val_492 12 -493 val_493 12 -494 val_494 12 -495 val_495 12 -496 val_496 12 -497 val_497 12 -498 val_498 12 -498 val_498 12 -498 val_498 12 -5 val_5 12 -5 val_5 12 -5 val_5 12 -51 val_51 12 -51 val_51 12 -53 val_53 12 -54 val_54 12 -57 val_57 12 -58 val_58 12 -58 val_58 12 -64 val_64 12 -65 val_65 12 -66 val_66 12 -67 val_67 12 -67 val_67 12 -69 val_69 12 -70 val_70 12 -70 val_70 12 -70 val_70 12 -72 val_72 12 -72 val_72 12 -74 val_74 12 -76 val_76 12 -76 val_76 12 -77 val_77 12 -78 val_78 12 -8 val_8 12 -80 val_80 12 -82 val_82 12 -83 val_83 12 -83 val_83 12 -84 val_84 12 -84 val_84 12 -85 val_85 12 -86 val_86 12 -87 val_87 12 -9 val_9 12 -90 val_90 12 -90 val_90 12 -90 val_90 12 -92 val_92 12 -95 val_95 12 -95 val_95 12 -96 val_96 12 -97 val_97 12 -97 val_97 12 -98 val_98 12 -98 val_98 12 -XrhdsR2p43qW9hpMcP/9nw== +1B2M2Y8AsgTpgAmY7PhCfg== PREHOOK: query: explain select key, value, hr from partint where hr between 11 and 12 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value, hr from partint where hr between 11 and 12 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: partint - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key, value, hr from partint where hr between 11 and 12 PREHOOK: type: QUERY PREHOOK: Input: default@partint -PREHOOK: Input: default@partint@ds=2008-04-08/hr=11 -PREHOOK: Input: default@partint@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select key, value, hr from partint where hr between 11 and 12 POSTHOOK: type: QUERY POSTHOOK: Input: default@partint -POSTHOOK: Input: default@partint@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@partint@ds=2008-04-08/hr=12 #### A masked pattern was here #### -0 val_0 11 -0 val_0 11 -0 val_0 11 -0 val_0 12 -0 val_0 12 -0 val_0 12 -10 val_10 11 -10 val_10 12 -100 val_100 11 -100 val_100 11 -100 val_100 12 -100 val_100 12 -103 val_103 11 -103 val_103 11 -103 val_103 12 -103 val_103 12 -104 val_104 11 -104 val_104 11 -104 val_104 12 -104 val_104 12 -105 val_105 11 -105 val_105 12 -11 val_11 11 -11 val_11 12 -111 val_111 11 -111 val_111 12 -113 val_113 11 -113 val_113 11 -113 val_113 12 -113 val_113 12 -114 val_114 11 -114 val_114 12 -116 val_116 11 -116 val_116 12 -118 val_118 11 -118 val_118 11 -118 val_118 12 -118 val_118 12 -119 val_119 11 -119 val_119 11 -119 val_119 11 -119 val_119 12 -119 val_119 12 -119 val_119 12 -12 val_12 11 -12 val_12 11 -12 val_12 12 -12 val_12 12 -120 val_120 11 -120 val_120 11 -120 val_120 12 -120 val_120 12 -125 val_125 11 -125 val_125 11 -125 val_125 12 -125 val_125 12 -126 val_126 11 -126 val_126 12 -128 val_128 11 -128 val_128 11 -128 val_128 11 -128 val_128 12 -128 val_128 12 -128 val_128 12 -129 val_129 11 -129 val_129 11 -129 val_129 12 -129 val_129 12 -131 val_131 11 -131 val_131 12 -133 val_133 11 -133 val_133 12 -134 val_134 11 -134 val_134 11 -134 val_134 12 -134 val_134 12 -136 val_136 11 -136 val_136 12 -137 val_137 11 -137 val_137 11 -137 val_137 12 -137 val_137 12 -138 val_138 11 -138 val_138 11 -138 val_138 11 -138 val_138 11 -138 val_138 12 -138 val_138 12 -138 val_138 12 -138 val_138 12 -143 val_143 11 -143 val_143 12 -145 val_145 11 -145 val_145 12 -146 val_146 11 -146 val_146 11 -146 val_146 12 -146 val_146 12 -149 val_149 11 -149 val_149 11 -149 val_149 12 -149 val_149 12 -15 val_15 11 -15 val_15 11 -15 val_15 12 -15 val_15 12 -150 val_150 11 -150 val_150 12 -152 val_152 11 -152 val_152 11 -152 val_152 12 -152 val_152 12 -153 val_153 11 -153 val_153 12 -155 val_155 11 -155 val_155 12 -156 val_156 11 -156 val_156 12 -157 val_157 11 -157 val_157 12 -158 val_158 11 -158 val_158 12 -160 val_160 11 -160 val_160 12 -162 val_162 11 -162 val_162 12 -163 val_163 11 -163 val_163 12 -164 val_164 11 -164 val_164 11 -164 val_164 12 -164 val_164 12 -165 val_165 11 -165 val_165 11 -165 val_165 12 -165 val_165 12 -166 val_166 11 -166 val_166 12 -167 val_167 11 -167 val_167 11 -167 val_167 11 -167 val_167 12 -167 val_167 12 -167 val_167 12 -168 val_168 11 -168 val_168 12 -169 val_169 11 -169 val_169 11 -169 val_169 11 -169 val_169 11 -169 val_169 12 -169 val_169 12 -169 val_169 12 -169 val_169 12 -17 val_17 11 -17 val_17 12 -170 val_170 11 -170 val_170 12 -172 val_172 11 -172 val_172 11 -172 val_172 12 -172 val_172 12 -174 val_174 11 -174 val_174 11 -174 val_174 12 -174 val_174 12 -175 val_175 11 -175 val_175 11 -175 val_175 12 -175 val_175 12 -176 val_176 11 -176 val_176 11 -176 val_176 12 -176 val_176 12 -177 val_177 11 -177 val_177 12 -178 val_178 11 -178 val_178 12 -179 val_179 11 -179 val_179 11 -179 val_179 12 -179 val_179 12 -18 val_18 11 -18 val_18 11 -18 val_18 12 -18 val_18 12 -180 val_180 11 -180 val_180 12 -181 val_181 11 -181 val_181 12 -183 val_183 11 -183 val_183 12 -186 val_186 11 -186 val_186 12 -187 val_187 11 -187 val_187 11 -187 val_187 11 -187 val_187 12 -187 val_187 12 -187 val_187 12 -189 val_189 11 -189 val_189 12 -19 val_19 11 -19 val_19 12 -190 val_190 11 -190 val_190 12 -191 val_191 11 -191 val_191 11 -191 val_191 12 -191 val_191 12 -192 val_192 11 -192 val_192 12 -193 val_193 11 -193 val_193 11 -193 val_193 11 -193 val_193 12 -193 val_193 12 -193 val_193 12 -194 val_194 11 -194 val_194 12 -195 val_195 11 -195 val_195 11 -195 val_195 12 -195 val_195 12 -196 val_196 11 -196 val_196 12 -197 val_197 11 -197 val_197 11 -197 val_197 12 -197 val_197 12 -199 val_199 11 -199 val_199 11 -199 val_199 11 -199 val_199 12 -199 val_199 12 -199 val_199 12 -2 val_2 11 -2 val_2 12 -20 val_20 11 -20 val_20 12 -200 val_200 11 -200 val_200 11 -200 val_200 12 -200 val_200 12 -201 val_201 11 -201 val_201 12 -202 val_202 11 -202 val_202 12 -203 val_203 11 -203 val_203 11 -203 val_203 12 -203 val_203 12 -205 val_205 11 -205 val_205 11 -205 val_205 12 -205 val_205 12 -207 val_207 11 -207 val_207 11 -207 val_207 12 -207 val_207 12 -208 val_208 11 -208 val_208 11 -208 val_208 11 -208 val_208 12 -208 val_208 12 -208 val_208 12 -209 val_209 11 -209 val_209 11 -209 val_209 12 -209 val_209 12 -213 val_213 11 -213 val_213 11 -213 val_213 12 -213 val_213 12 -214 val_214 11 -214 val_214 12 -216 val_216 11 -216 val_216 11 -216 val_216 12 -216 val_216 12 -217 val_217 11 -217 val_217 11 -217 val_217 12 -217 val_217 12 -218 val_218 11 -218 val_218 12 -219 val_219 11 -219 val_219 11 -219 val_219 12 -219 val_219 12 -221 val_221 11 -221 val_221 11 -221 val_221 12 -221 val_221 12 -222 val_222 11 -222 val_222 12 -223 val_223 11 -223 val_223 11 -223 val_223 12 -223 val_223 12 -224 val_224 11 -224 val_224 11 -224 val_224 12 -224 val_224 12 -226 val_226 11 -226 val_226 12 -228 val_228 11 -228 val_228 12 -229 val_229 11 -229 val_229 11 -229 val_229 12 -229 val_229 12 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 12 -230 val_230 12 -230 val_230 12 -230 val_230 12 -230 val_230 12 -233 val_233 11 -233 val_233 11 -233 val_233 12 -233 val_233 12 -235 val_235 11 -235 val_235 12 -237 val_237 11 -237 val_237 11 -237 val_237 12 -237 val_237 12 -238 val_238 11 -238 val_238 11 -238 val_238 12 -238 val_238 12 -239 val_239 11 -239 val_239 11 -239 val_239 12 -239 val_239 12 -24 val_24 11 -24 val_24 11 -24 val_24 12 -24 val_24 12 -241 val_241 11 -241 val_241 12 -242 val_242 11 -242 val_242 11 -242 val_242 12 -242 val_242 12 -244 val_244 11 -244 val_244 12 -247 val_247 11 -247 val_247 12 -248 val_248 11 -248 val_248 12 -249 val_249 11 -249 val_249 12 -252 val_252 11 -252 val_252 12 -255 val_255 11 -255 val_255 11 -255 val_255 12 -255 val_255 12 -256 val_256 11 -256 val_256 11 -256 val_256 12 -256 val_256 12 -257 val_257 11 -257 val_257 12 -258 val_258 11 -258 val_258 12 -26 val_26 11 -26 val_26 11 -26 val_26 12 -26 val_26 12 -260 val_260 11 -260 val_260 12 -262 val_262 11 -262 val_262 12 -263 val_263 11 -263 val_263 12 -265 val_265 11 -265 val_265 11 -265 val_265 12 -265 val_265 12 -266 val_266 11 -266 val_266 12 -27 val_27 11 -27 val_27 12 -272 val_272 11 -272 val_272 11 -272 val_272 12 -272 val_272 12 -273 val_273 11 -273 val_273 11 -273 val_273 11 -273 val_273 12 -273 val_273 12 -273 val_273 12 -274 val_274 11 -274 val_274 12 -275 val_275 11 -275 val_275 12 -277 val_277 11 -277 val_277 11 -277 val_277 11 -277 val_277 11 -277 val_277 12 -277 val_277 12 -277 val_277 12 -277 val_277 12 -278 val_278 11 -278 val_278 11 -278 val_278 12 -278 val_278 12 -28 val_28 11 -28 val_28 12 -280 val_280 11 -280 val_280 11 -280 val_280 12 -280 val_280 12 -281 val_281 11 -281 val_281 11 -281 val_281 12 -281 val_281 12 -282 val_282 11 -282 val_282 11 -282 val_282 12 -282 val_282 12 -283 val_283 11 -283 val_283 12 -284 val_284 11 -284 val_284 12 -285 val_285 11 -285 val_285 12 -286 val_286 11 -286 val_286 12 -287 val_287 11 -287 val_287 12 -288 val_288 11 -288 val_288 11 -288 val_288 12 -288 val_288 12 -289 val_289 11 -289 val_289 12 -291 val_291 11 -291 val_291 12 -292 val_292 11 -292 val_292 12 -296 val_296 11 -296 val_296 12 -298 val_298 11 -298 val_298 11 -298 val_298 11 -298 val_298 12 -298 val_298 12 -298 val_298 12 -30 val_30 11 -30 val_30 12 -302 val_302 11 -302 val_302 12 -305 val_305 11 -305 val_305 12 -306 val_306 11 -306 val_306 12 -307 val_307 11 -307 val_307 11 -307 val_307 12 -307 val_307 12 -308 val_308 11 -308 val_308 12 -309 val_309 11 -309 val_309 11 -309 val_309 12 -309 val_309 12 -310 val_310 11 -310 val_310 12 -311 val_311 11 -311 val_311 11 -311 val_311 11 -311 val_311 12 -311 val_311 12 -311 val_311 12 -315 val_315 11 -315 val_315 12 -316 val_316 11 -316 val_316 11 -316 val_316 11 -316 val_316 12 -316 val_316 12 -316 val_316 12 -317 val_317 11 -317 val_317 11 -317 val_317 12 -317 val_317 12 -318 val_318 11 -318 val_318 11 -318 val_318 11 -318 val_318 12 -318 val_318 12 -318 val_318 12 -321 val_321 11 -321 val_321 11 -321 val_321 12 -321 val_321 12 -322 val_322 11 -322 val_322 11 -322 val_322 12 -322 val_322 12 -323 val_323 11 -323 val_323 12 -325 val_325 11 -325 val_325 11 -325 val_325 12 -325 val_325 12 -327 val_327 11 -327 val_327 11 -327 val_327 11 -327 val_327 12 -327 val_327 12 -327 val_327 12 -33 val_33 11 -33 val_33 12 -331 val_331 11 -331 val_331 11 -331 val_331 12 -331 val_331 12 -332 val_332 11 -332 val_332 12 -333 val_333 11 -333 val_333 11 -333 val_333 12 -333 val_333 12 -335 val_335 11 -335 val_335 12 -336 val_336 11 -336 val_336 12 -338 val_338 11 -338 val_338 12 -339 val_339 11 -339 val_339 12 -34 val_34 11 -34 val_34 12 -341 val_341 11 -341 val_341 12 -342 val_342 11 -342 val_342 11 -342 val_342 12 -342 val_342 12 -344 val_344 11 -344 val_344 11 -344 val_344 12 -344 val_344 12 -345 val_345 11 -345 val_345 12 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 12 -348 val_348 12 -348 val_348 12 -348 val_348 12 -348 val_348 12 -35 val_35 11 -35 val_35 11 -35 val_35 11 -35 val_35 12 -35 val_35 12 -35 val_35 12 -351 val_351 11 -351 val_351 12 -353 val_353 11 -353 val_353 11 -353 val_353 12 -353 val_353 12 -356 val_356 11 -356 val_356 12 -360 val_360 11 -360 val_360 12 -362 val_362 11 -362 val_362 12 -364 val_364 11 -364 val_364 12 -365 val_365 11 -365 val_365 12 -366 val_366 11 -366 val_366 12 -367 val_367 11 -367 val_367 11 -367 val_367 12 -367 val_367 12 -368 val_368 11 -368 val_368 12 -369 val_369 11 -369 val_369 11 -369 val_369 11 -369 val_369 12 -369 val_369 12 -369 val_369 12 -37 val_37 11 -37 val_37 11 -37 val_37 12 -37 val_37 12 -373 val_373 11 -373 val_373 12 -374 val_374 11 -374 val_374 12 -375 val_375 11 -375 val_375 12 -377 val_377 11 -377 val_377 12 -378 val_378 11 -378 val_378 12 -379 val_379 11 -379 val_379 12 -382 val_382 11 -382 val_382 11 -382 val_382 12 -382 val_382 12 -384 val_384 11 -384 val_384 11 -384 val_384 11 -384 val_384 12 -384 val_384 12 -384 val_384 12 -386 val_386 11 -386 val_386 12 -389 val_389 11 -389 val_389 12 -392 val_392 11 -392 val_392 12 -393 val_393 11 -393 val_393 12 -394 val_394 11 -394 val_394 12 -395 val_395 11 -395 val_395 11 -395 val_395 12 -395 val_395 12 -396 val_396 11 -396 val_396 11 -396 val_396 11 -396 val_396 12 -396 val_396 12 -396 val_396 12 -397 val_397 11 -397 val_397 11 -397 val_397 12 -397 val_397 12 -399 val_399 11 -399 val_399 11 -399 val_399 12 -399 val_399 12 -4 val_4 11 -4 val_4 12 -400 val_400 11 -400 val_400 12 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 12 -401 val_401 12 -401 val_401 12 -401 val_401 12 -401 val_401 12 -402 val_402 11 -402 val_402 12 -403 val_403 11 -403 val_403 11 -403 val_403 11 -403 val_403 12 -403 val_403 12 -403 val_403 12 -404 val_404 11 -404 val_404 11 -404 val_404 12 -404 val_404 12 -406 val_406 11 -406 val_406 11 -406 val_406 11 -406 val_406 11 -406 val_406 12 -406 val_406 12 -406 val_406 12 -406 val_406 12 -407 val_407 11 -407 val_407 12 -409 val_409 11 -409 val_409 11 -409 val_409 11 -409 val_409 12 -409 val_409 12 -409 val_409 12 -41 val_41 11 -41 val_41 12 -411 val_411 11 -411 val_411 12 -413 val_413 11 -413 val_413 11 -413 val_413 12 -413 val_413 12 -414 val_414 11 -414 val_414 11 -414 val_414 12 -414 val_414 12 -417 val_417 11 -417 val_417 11 -417 val_417 11 -417 val_417 12 -417 val_417 12 -417 val_417 12 -418 val_418 11 -418 val_418 12 -419 val_419 11 -419 val_419 12 -42 val_42 11 -42 val_42 11 -42 val_42 12 -42 val_42 12 -421 val_421 11 -421 val_421 12 -424 val_424 11 -424 val_424 11 -424 val_424 12 -424 val_424 12 -427 val_427 11 -427 val_427 12 -429 val_429 11 -429 val_429 11 -429 val_429 12 -429 val_429 12 -43 val_43 11 -43 val_43 12 -430 val_430 11 -430 val_430 11 -430 val_430 11 -430 val_430 12 -430 val_430 12 -430 val_430 12 -431 val_431 11 -431 val_431 11 -431 val_431 11 -431 val_431 12 -431 val_431 12 -431 val_431 12 -432 val_432 11 -432 val_432 12 -435 val_435 11 -435 val_435 12 -436 val_436 11 -436 val_436 12 -437 val_437 11 -437 val_437 12 -438 val_438 11 -438 val_438 11 -438 val_438 11 -438 val_438 12 -438 val_438 12 -438 val_438 12 -439 val_439 11 -439 val_439 11 -439 val_439 12 -439 val_439 12 -44 val_44 11 -44 val_44 12 -443 val_443 11 -443 val_443 12 -444 val_444 11 -444 val_444 12 -446 val_446 11 -446 val_446 12 -448 val_448 11 -448 val_448 12 -449 val_449 11 -449 val_449 12 -452 val_452 11 -452 val_452 12 -453 val_453 11 -453 val_453 12 -454 val_454 11 -454 val_454 11 -454 val_454 11 -454 val_454 12 -454 val_454 12 -454 val_454 12 -455 val_455 11 -455 val_455 12 -457 val_457 11 -457 val_457 12 -458 val_458 11 -458 val_458 11 -458 val_458 12 -458 val_458 12 -459 val_459 11 -459 val_459 11 -459 val_459 12 -459 val_459 12 -460 val_460 11 -460 val_460 12 -462 val_462 11 -462 val_462 11 -462 val_462 12 -462 val_462 12 -463 val_463 11 -463 val_463 11 -463 val_463 12 -463 val_463 12 -466 val_466 11 -466 val_466 11 -466 val_466 11 -466 val_466 12 -466 val_466 12 -466 val_466 12 -467 val_467 11 -467 val_467 12 -468 val_468 11 -468 val_468 11 -468 val_468 11 -468 val_468 11 -468 val_468 12 -468 val_468 12 -468 val_468 12 -468 val_468 12 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 12 -469 val_469 12 -469 val_469 12 -469 val_469 12 -469 val_469 12 -47 val_47 11 -47 val_47 12 -470 val_470 11 -470 val_470 12 -472 val_472 11 -472 val_472 12 -475 val_475 11 -475 val_475 12 -477 val_477 11 -477 val_477 12 -478 val_478 11 -478 val_478 11 -478 val_478 12 -478 val_478 12 -479 val_479 11 -479 val_479 12 -480 val_480 11 -480 val_480 11 -480 val_480 11 -480 val_480 12 -480 val_480 12 -480 val_480 12 -481 val_481 11 -481 val_481 12 -482 val_482 11 -482 val_482 12 -483 val_483 11 -483 val_483 12 -484 val_484 11 -484 val_484 12 -485 val_485 11 -485 val_485 12 -487 val_487 11 -487 val_487 12 -489 val_489 11 -489 val_489 11 -489 val_489 11 -489 val_489 11 -489 val_489 12 -489 val_489 12 -489 val_489 12 -489 val_489 12 -490 val_490 11 -490 val_490 12 -491 val_491 11 -491 val_491 12 -492 val_492 11 -492 val_492 11 -492 val_492 12 -492 val_492 12 -493 val_493 11 -493 val_493 12 -494 val_494 11 -494 val_494 12 -495 val_495 11 -495 val_495 12 -496 val_496 11 -496 val_496 12 -497 val_497 11 -497 val_497 12 -498 val_498 11 -498 val_498 11 -498 val_498 11 -498 val_498 12 -498 val_498 12 -498 val_498 12 -5 val_5 11 -5 val_5 11 -5 val_5 11 -5 val_5 12 -5 val_5 12 -5 val_5 12 -51 val_51 11 -51 val_51 11 -51 val_51 12 -51 val_51 12 -53 val_53 11 -53 val_53 12 -54 val_54 11 -54 val_54 12 -57 val_57 11 -57 val_57 12 -58 val_58 11 -58 val_58 11 -58 val_58 12 -58 val_58 12 -64 val_64 11 -64 val_64 12 -65 val_65 11 -65 val_65 12 -66 val_66 11 -66 val_66 12 -67 val_67 11 -67 val_67 11 -67 val_67 12 -67 val_67 12 -69 val_69 11 -69 val_69 12 -70 val_70 11 -70 val_70 11 -70 val_70 11 -70 val_70 12 -70 val_70 12 -70 val_70 12 -72 val_72 11 -72 val_72 11 -72 val_72 12 -72 val_72 12 -74 val_74 11 -74 val_74 12 -76 val_76 11 -76 val_76 11 -76 val_76 12 -76 val_76 12 -77 val_77 11 -77 val_77 12 -78 val_78 11 -78 val_78 12 -8 val_8 11 -8 val_8 12 -80 val_80 11 -80 val_80 12 -82 val_82 11 -82 val_82 12 -83 val_83 11 -83 val_83 11 -83 val_83 12 -83 val_83 12 -84 val_84 11 -84 val_84 11 -84 val_84 12 -84 val_84 12 -85 val_85 11 -85 val_85 12 -86 val_86 11 -86 val_86 12 -87 val_87 11 -87 val_87 12 -9 val_9 11 -9 val_9 12 -90 val_90 11 -90 val_90 11 -90 val_90 11 -90 val_90 12 -90 val_90 12 -90 val_90 12 -92 val_92 11 -92 val_92 12 -95 val_95 11 -95 val_95 11 -95 val_95 12 -95 val_95 12 -96 val_96 11 -96 val_96 12 -97 val_97 11 -97 val_97 11 -97 val_97 12 -97 val_97 12 -98 val_98 11 -98 val_98 11 -98 val_98 12 -98 val_98 12 -irG2+wNa4ZWkUKb0+hXweg== +1B2M2Y8AsgTpgAmY7PhCfg== PREHOOK: query: explain select key, value, hr from partint where hr not between 12 and 14 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value, hr from partint where hr not between 12 and 14 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: partint - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key, value, hr from partint where hr not between 12 and 14 PREHOOK: type: QUERY PREHOOK: Input: default@partint -PREHOOK: Input: default@partint@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select key, value, hr from partint where hr not between 12 and 14 POSTHOOK: type: QUERY POSTHOOK: Input: default@partint -POSTHOOK: Input: default@partint@ds=2008-04-08/hr=11 #### A masked pattern was here #### -0 val_0 11 -0 val_0 11 -0 val_0 11 -10 val_10 11 -100 val_100 11 -100 val_100 11 -103 val_103 11 -103 val_103 11 -104 val_104 11 -104 val_104 11 -105 val_105 11 -11 val_11 11 -111 val_111 11 -113 val_113 11 -113 val_113 11 -114 val_114 11 -116 val_116 11 -118 val_118 11 -118 val_118 11 -119 val_119 11 -119 val_119 11 -119 val_119 11 -12 val_12 11 -12 val_12 11 -120 val_120 11 -120 val_120 11 -125 val_125 11 -125 val_125 11 -126 val_126 11 -128 val_128 11 -128 val_128 11 -128 val_128 11 -129 val_129 11 -129 val_129 11 -131 val_131 11 -133 val_133 11 -134 val_134 11 -134 val_134 11 -136 val_136 11 -137 val_137 11 -137 val_137 11 -138 val_138 11 -138 val_138 11 -138 val_138 11 -138 val_138 11 -143 val_143 11 -145 val_145 11 -146 val_146 11 -146 val_146 11 -149 val_149 11 -149 val_149 11 -15 val_15 11 -15 val_15 11 -150 val_150 11 -152 val_152 11 -152 val_152 11 -153 val_153 11 -155 val_155 11 -156 val_156 11 -157 val_157 11 -158 val_158 11 -160 val_160 11 -162 val_162 11 -163 val_163 11 -164 val_164 11 -164 val_164 11 -165 val_165 11 -165 val_165 11 -166 val_166 11 -167 val_167 11 -167 val_167 11 -167 val_167 11 -168 val_168 11 -169 val_169 11 -169 val_169 11 -169 val_169 11 -169 val_169 11 -17 val_17 11 -170 val_170 11 -172 val_172 11 -172 val_172 11 -174 val_174 11 -174 val_174 11 -175 val_175 11 -175 val_175 11 -176 val_176 11 -176 val_176 11 -177 val_177 11 -178 val_178 11 -179 val_179 11 -179 val_179 11 -18 val_18 11 -18 val_18 11 -180 val_180 11 -181 val_181 11 -183 val_183 11 -186 val_186 11 -187 val_187 11 -187 val_187 11 -187 val_187 11 -189 val_189 11 -19 val_19 11 -190 val_190 11 -191 val_191 11 -191 val_191 11 -192 val_192 11 -193 val_193 11 -193 val_193 11 -193 val_193 11 -194 val_194 11 -195 val_195 11 -195 val_195 11 -196 val_196 11 -197 val_197 11 -197 val_197 11 -199 val_199 11 -199 val_199 11 -199 val_199 11 -2 val_2 11 -20 val_20 11 -200 val_200 11 -200 val_200 11 -201 val_201 11 -202 val_202 11 -203 val_203 11 -203 val_203 11 -205 val_205 11 -205 val_205 11 -207 val_207 11 -207 val_207 11 -208 val_208 11 -208 val_208 11 -208 val_208 11 -209 val_209 11 -209 val_209 11 -213 val_213 11 -213 val_213 11 -214 val_214 11 -216 val_216 11 -216 val_216 11 -217 val_217 11 -217 val_217 11 -218 val_218 11 -219 val_219 11 -219 val_219 11 -221 val_221 11 -221 val_221 11 -222 val_222 11 -223 val_223 11 -223 val_223 11 -224 val_224 11 -224 val_224 11 -226 val_226 11 -228 val_228 11 -229 val_229 11 -229 val_229 11 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 11 -233 val_233 11 -233 val_233 11 -235 val_235 11 -237 val_237 11 -237 val_237 11 -238 val_238 11 -238 val_238 11 -239 val_239 11 -239 val_239 11 -24 val_24 11 -24 val_24 11 -241 val_241 11 -242 val_242 11 -242 val_242 11 -244 val_244 11 -247 val_247 11 -248 val_248 11 -249 val_249 11 -252 val_252 11 -255 val_255 11 -255 val_255 11 -256 val_256 11 -256 val_256 11 -257 val_257 11 -258 val_258 11 -26 val_26 11 -26 val_26 11 -260 val_260 11 -262 val_262 11 -263 val_263 11 -265 val_265 11 -265 val_265 11 -266 val_266 11 -27 val_27 11 -272 val_272 11 -272 val_272 11 -273 val_273 11 -273 val_273 11 -273 val_273 11 -274 val_274 11 -275 val_275 11 -277 val_277 11 -277 val_277 11 -277 val_277 11 -277 val_277 11 -278 val_278 11 -278 val_278 11 -28 val_28 11 -280 val_280 11 -280 val_280 11 -281 val_281 11 -281 val_281 11 -282 val_282 11 -282 val_282 11 -283 val_283 11 -284 val_284 11 -285 val_285 11 -286 val_286 11 -287 val_287 11 -288 val_288 11 -288 val_288 11 -289 val_289 11 -291 val_291 11 -292 val_292 11 -296 val_296 11 -298 val_298 11 -298 val_298 11 -298 val_298 11 -30 val_30 11 -302 val_302 11 -305 val_305 11 -306 val_306 11 -307 val_307 11 -307 val_307 11 -308 val_308 11 -309 val_309 11 -309 val_309 11 -310 val_310 11 -311 val_311 11 -311 val_311 11 -311 val_311 11 -315 val_315 11 -316 val_316 11 -316 val_316 11 -316 val_316 11 -317 val_317 11 -317 val_317 11 -318 val_318 11 -318 val_318 11 -318 val_318 11 -321 val_321 11 -321 val_321 11 -322 val_322 11 -322 val_322 11 -323 val_323 11 -325 val_325 11 -325 val_325 11 -327 val_327 11 -327 val_327 11 -327 val_327 11 -33 val_33 11 -331 val_331 11 -331 val_331 11 -332 val_332 11 -333 val_333 11 -333 val_333 11 -335 val_335 11 -336 val_336 11 -338 val_338 11 -339 val_339 11 -34 val_34 11 -341 val_341 11 -342 val_342 11 -342 val_342 11 -344 val_344 11 -344 val_344 11 -345 val_345 11 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 11 -35 val_35 11 -35 val_35 11 -35 val_35 11 -351 val_351 11 -353 val_353 11 -353 val_353 11 -356 val_356 11 -360 val_360 11 -362 val_362 11 -364 val_364 11 -365 val_365 11 -366 val_366 11 -367 val_367 11 -367 val_367 11 -368 val_368 11 -369 val_369 11 -369 val_369 11 -369 val_369 11 -37 val_37 11 -37 val_37 11 -373 val_373 11 -374 val_374 11 -375 val_375 11 -377 val_377 11 -378 val_378 11 -379 val_379 11 -382 val_382 11 -382 val_382 11 -384 val_384 11 -384 val_384 11 -384 val_384 11 -386 val_386 11 -389 val_389 11 -392 val_392 11 -393 val_393 11 -394 val_394 11 -395 val_395 11 -395 val_395 11 -396 val_396 11 -396 val_396 11 -396 val_396 11 -397 val_397 11 -397 val_397 11 -399 val_399 11 -399 val_399 11 -4 val_4 11 -400 val_400 11 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 11 -402 val_402 11 -403 val_403 11 -403 val_403 11 -403 val_403 11 -404 val_404 11 -404 val_404 11 -406 val_406 11 -406 val_406 11 -406 val_406 11 -406 val_406 11 -407 val_407 11 -409 val_409 11 -409 val_409 11 -409 val_409 11 -41 val_41 11 -411 val_411 11 -413 val_413 11 -413 val_413 11 -414 val_414 11 -414 val_414 11 -417 val_417 11 -417 val_417 11 -417 val_417 11 -418 val_418 11 -419 val_419 11 -42 val_42 11 -42 val_42 11 -421 val_421 11 -424 val_424 11 -424 val_424 11 -427 val_427 11 -429 val_429 11 -429 val_429 11 -43 val_43 11 -430 val_430 11 -430 val_430 11 -430 val_430 11 -431 val_431 11 -431 val_431 11 -431 val_431 11 -432 val_432 11 -435 val_435 11 -436 val_436 11 -437 val_437 11 -438 val_438 11 -438 val_438 11 -438 val_438 11 -439 val_439 11 -439 val_439 11 -44 val_44 11 -443 val_443 11 -444 val_444 11 -446 val_446 11 -448 val_448 11 -449 val_449 11 -452 val_452 11 -453 val_453 11 -454 val_454 11 -454 val_454 11 -454 val_454 11 -455 val_455 11 -457 val_457 11 -458 val_458 11 -458 val_458 11 -459 val_459 11 -459 val_459 11 -460 val_460 11 -462 val_462 11 -462 val_462 11 -463 val_463 11 -463 val_463 11 -466 val_466 11 -466 val_466 11 -466 val_466 11 -467 val_467 11 -468 val_468 11 -468 val_468 11 -468 val_468 11 -468 val_468 11 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 11 -47 val_47 11 -470 val_470 11 -472 val_472 11 -475 val_475 11 -477 val_477 11 -478 val_478 11 -478 val_478 11 -479 val_479 11 -480 val_480 11 -480 val_480 11 -480 val_480 11 -481 val_481 11 -482 val_482 11 -483 val_483 11 -484 val_484 11 -485 val_485 11 -487 val_487 11 -489 val_489 11 -489 val_489 11 -489 val_489 11 -489 val_489 11 -490 val_490 11 -491 val_491 11 -492 val_492 11 -492 val_492 11 -493 val_493 11 -494 val_494 11 -495 val_495 11 -496 val_496 11 -497 val_497 11 -498 val_498 11 -498 val_498 11 -498 val_498 11 -5 val_5 11 -5 val_5 11 -5 val_5 11 -51 val_51 11 -51 val_51 11 -53 val_53 11 -54 val_54 11 -57 val_57 11 -58 val_58 11 -58 val_58 11 -64 val_64 11 -65 val_65 11 -66 val_66 11 -67 val_67 11 -67 val_67 11 -69 val_69 11 -70 val_70 11 -70 val_70 11 -70 val_70 11 -72 val_72 11 -72 val_72 11 -74 val_74 11 -76 val_76 11 -76 val_76 11 -77 val_77 11 -78 val_78 11 -8 val_8 11 -80 val_80 11 -82 val_82 11 -83 val_83 11 -83 val_83 11 -84 val_84 11 -84 val_84 11 -85 val_85 11 -86 val_86 11 -87 val_87 11 -9 val_9 11 -90 val_90 11 -90 val_90 11 -90 val_90 11 -92 val_92 11 -95 val_95 11 -95 val_95 11 -96 val_96 11 -97 val_97 11 -97 val_97 11 -98 val_98 11 -98 val_98 11 -v6vY2XVDyLw+4d5w+xCycg== +1B2M2Y8AsgTpgAmY7PhCfg== PREHOOK: query: explain select key, value, hr from partint where hr < 13 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value, hr from partint where hr < 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: partint - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key, value, hr from partint where hr < 13 PREHOOK: type: QUERY PREHOOK: Input: default@partint -PREHOOK: Input: default@partint@ds=2008-04-08/hr=11 -PREHOOK: Input: default@partint@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select key, value, hr from partint where hr < 13 POSTHOOK: type: QUERY POSTHOOK: Input: default@partint -POSTHOOK: Input: default@partint@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@partint@ds=2008-04-08/hr=12 #### A masked pattern was here #### -0 val_0 11 -0 val_0 11 -0 val_0 11 -0 val_0 12 -0 val_0 12 -0 val_0 12 -10 val_10 11 -10 val_10 12 -100 val_100 11 -100 val_100 11 -100 val_100 12 -100 val_100 12 -103 val_103 11 -103 val_103 11 -103 val_103 12 -103 val_103 12 -104 val_104 11 -104 val_104 11 -104 val_104 12 -104 val_104 12 -105 val_105 11 -105 val_105 12 -11 val_11 11 -11 val_11 12 -111 val_111 11 -111 val_111 12 -113 val_113 11 -113 val_113 11 -113 val_113 12 -113 val_113 12 -114 val_114 11 -114 val_114 12 -116 val_116 11 -116 val_116 12 -118 val_118 11 -118 val_118 11 -118 val_118 12 -118 val_118 12 -119 val_119 11 -119 val_119 11 -119 val_119 11 -119 val_119 12 -119 val_119 12 -119 val_119 12 -12 val_12 11 -12 val_12 11 -12 val_12 12 -12 val_12 12 -120 val_120 11 -120 val_120 11 -120 val_120 12 -120 val_120 12 -125 val_125 11 -125 val_125 11 -125 val_125 12 -125 val_125 12 -126 val_126 11 -126 val_126 12 -128 val_128 11 -128 val_128 11 -128 val_128 11 -128 val_128 12 -128 val_128 12 -128 val_128 12 -129 val_129 11 -129 val_129 11 -129 val_129 12 -129 val_129 12 -131 val_131 11 -131 val_131 12 -133 val_133 11 -133 val_133 12 -134 val_134 11 -134 val_134 11 -134 val_134 12 -134 val_134 12 -136 val_136 11 -136 val_136 12 -137 val_137 11 -137 val_137 11 -137 val_137 12 -137 val_137 12 -138 val_138 11 -138 val_138 11 -138 val_138 11 -138 val_138 11 -138 val_138 12 -138 val_138 12 -138 val_138 12 -138 val_138 12 -143 val_143 11 -143 val_143 12 -145 val_145 11 -145 val_145 12 -146 val_146 11 -146 val_146 11 -146 val_146 12 -146 val_146 12 -149 val_149 11 -149 val_149 11 -149 val_149 12 -149 val_149 12 -15 val_15 11 -15 val_15 11 -15 val_15 12 -15 val_15 12 -150 val_150 11 -150 val_150 12 -152 val_152 11 -152 val_152 11 -152 val_152 12 -152 val_152 12 -153 val_153 11 -153 val_153 12 -155 val_155 11 -155 val_155 12 -156 val_156 11 -156 val_156 12 -157 val_157 11 -157 val_157 12 -158 val_158 11 -158 val_158 12 -160 val_160 11 -160 val_160 12 -162 val_162 11 -162 val_162 12 -163 val_163 11 -163 val_163 12 -164 val_164 11 -164 val_164 11 -164 val_164 12 -164 val_164 12 -165 val_165 11 -165 val_165 11 -165 val_165 12 -165 val_165 12 -166 val_166 11 -166 val_166 12 -167 val_167 11 -167 val_167 11 -167 val_167 11 -167 val_167 12 -167 val_167 12 -167 val_167 12 -168 val_168 11 -168 val_168 12 -169 val_169 11 -169 val_169 11 -169 val_169 11 -169 val_169 11 -169 val_169 12 -169 val_169 12 -169 val_169 12 -169 val_169 12 -17 val_17 11 -17 val_17 12 -170 val_170 11 -170 val_170 12 -172 val_172 11 -172 val_172 11 -172 val_172 12 -172 val_172 12 -174 val_174 11 -174 val_174 11 -174 val_174 12 -174 val_174 12 -175 val_175 11 -175 val_175 11 -175 val_175 12 -175 val_175 12 -176 val_176 11 -176 val_176 11 -176 val_176 12 -176 val_176 12 -177 val_177 11 -177 val_177 12 -178 val_178 11 -178 val_178 12 -179 val_179 11 -179 val_179 11 -179 val_179 12 -179 val_179 12 -18 val_18 11 -18 val_18 11 -18 val_18 12 -18 val_18 12 -180 val_180 11 -180 val_180 12 -181 val_181 11 -181 val_181 12 -183 val_183 11 -183 val_183 12 -186 val_186 11 -186 val_186 12 -187 val_187 11 -187 val_187 11 -187 val_187 11 -187 val_187 12 -187 val_187 12 -187 val_187 12 -189 val_189 11 -189 val_189 12 -19 val_19 11 -19 val_19 12 -190 val_190 11 -190 val_190 12 -191 val_191 11 -191 val_191 11 -191 val_191 12 -191 val_191 12 -192 val_192 11 -192 val_192 12 -193 val_193 11 -193 val_193 11 -193 val_193 11 -193 val_193 12 -193 val_193 12 -193 val_193 12 -194 val_194 11 -194 val_194 12 -195 val_195 11 -195 val_195 11 -195 val_195 12 -195 val_195 12 -196 val_196 11 -196 val_196 12 -197 val_197 11 -197 val_197 11 -197 val_197 12 -197 val_197 12 -199 val_199 11 -199 val_199 11 -199 val_199 11 -199 val_199 12 -199 val_199 12 -199 val_199 12 -2 val_2 11 -2 val_2 12 -20 val_20 11 -20 val_20 12 -200 val_200 11 -200 val_200 11 -200 val_200 12 -200 val_200 12 -201 val_201 11 -201 val_201 12 -202 val_202 11 -202 val_202 12 -203 val_203 11 -203 val_203 11 -203 val_203 12 -203 val_203 12 -205 val_205 11 -205 val_205 11 -205 val_205 12 -205 val_205 12 -207 val_207 11 -207 val_207 11 -207 val_207 12 -207 val_207 12 -208 val_208 11 -208 val_208 11 -208 val_208 11 -208 val_208 12 -208 val_208 12 -208 val_208 12 -209 val_209 11 -209 val_209 11 -209 val_209 12 -209 val_209 12 -213 val_213 11 -213 val_213 11 -213 val_213 12 -213 val_213 12 -214 val_214 11 -214 val_214 12 -216 val_216 11 -216 val_216 11 -216 val_216 12 -216 val_216 12 -217 val_217 11 -217 val_217 11 -217 val_217 12 -217 val_217 12 -218 val_218 11 -218 val_218 12 -219 val_219 11 -219 val_219 11 -219 val_219 12 -219 val_219 12 -221 val_221 11 -221 val_221 11 -221 val_221 12 -221 val_221 12 -222 val_222 11 -222 val_222 12 -223 val_223 11 -223 val_223 11 -223 val_223 12 -223 val_223 12 -224 val_224 11 -224 val_224 11 -224 val_224 12 -224 val_224 12 -226 val_226 11 -226 val_226 12 -228 val_228 11 -228 val_228 12 -229 val_229 11 -229 val_229 11 -229 val_229 12 -229 val_229 12 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 11 -230 val_230 12 -230 val_230 12 -230 val_230 12 -230 val_230 12 -230 val_230 12 -233 val_233 11 -233 val_233 11 -233 val_233 12 -233 val_233 12 -235 val_235 11 -235 val_235 12 -237 val_237 11 -237 val_237 11 -237 val_237 12 -237 val_237 12 -238 val_238 11 -238 val_238 11 -238 val_238 12 -238 val_238 12 -239 val_239 11 -239 val_239 11 -239 val_239 12 -239 val_239 12 -24 val_24 11 -24 val_24 11 -24 val_24 12 -24 val_24 12 -241 val_241 11 -241 val_241 12 -242 val_242 11 -242 val_242 11 -242 val_242 12 -242 val_242 12 -244 val_244 11 -244 val_244 12 -247 val_247 11 -247 val_247 12 -248 val_248 11 -248 val_248 12 -249 val_249 11 -249 val_249 12 -252 val_252 11 -252 val_252 12 -255 val_255 11 -255 val_255 11 -255 val_255 12 -255 val_255 12 -256 val_256 11 -256 val_256 11 -256 val_256 12 -256 val_256 12 -257 val_257 11 -257 val_257 12 -258 val_258 11 -258 val_258 12 -26 val_26 11 -26 val_26 11 -26 val_26 12 -26 val_26 12 -260 val_260 11 -260 val_260 12 -262 val_262 11 -262 val_262 12 -263 val_263 11 -263 val_263 12 -265 val_265 11 -265 val_265 11 -265 val_265 12 -265 val_265 12 -266 val_266 11 -266 val_266 12 -27 val_27 11 -27 val_27 12 -272 val_272 11 -272 val_272 11 -272 val_272 12 -272 val_272 12 -273 val_273 11 -273 val_273 11 -273 val_273 11 -273 val_273 12 -273 val_273 12 -273 val_273 12 -274 val_274 11 -274 val_274 12 -275 val_275 11 -275 val_275 12 -277 val_277 11 -277 val_277 11 -277 val_277 11 -277 val_277 11 -277 val_277 12 -277 val_277 12 -277 val_277 12 -277 val_277 12 -278 val_278 11 -278 val_278 11 -278 val_278 12 -278 val_278 12 -28 val_28 11 -28 val_28 12 -280 val_280 11 -280 val_280 11 -280 val_280 12 -280 val_280 12 -281 val_281 11 -281 val_281 11 -281 val_281 12 -281 val_281 12 -282 val_282 11 -282 val_282 11 -282 val_282 12 -282 val_282 12 -283 val_283 11 -283 val_283 12 -284 val_284 11 -284 val_284 12 -285 val_285 11 -285 val_285 12 -286 val_286 11 -286 val_286 12 -287 val_287 11 -287 val_287 12 -288 val_288 11 -288 val_288 11 -288 val_288 12 -288 val_288 12 -289 val_289 11 -289 val_289 12 -291 val_291 11 -291 val_291 12 -292 val_292 11 -292 val_292 12 -296 val_296 11 -296 val_296 12 -298 val_298 11 -298 val_298 11 -298 val_298 11 -298 val_298 12 -298 val_298 12 -298 val_298 12 -30 val_30 11 -30 val_30 12 -302 val_302 11 -302 val_302 12 -305 val_305 11 -305 val_305 12 -306 val_306 11 -306 val_306 12 -307 val_307 11 -307 val_307 11 -307 val_307 12 -307 val_307 12 -308 val_308 11 -308 val_308 12 -309 val_309 11 -309 val_309 11 -309 val_309 12 -309 val_309 12 -310 val_310 11 -310 val_310 12 -311 val_311 11 -311 val_311 11 -311 val_311 11 -311 val_311 12 -311 val_311 12 -311 val_311 12 -315 val_315 11 -315 val_315 12 -316 val_316 11 -316 val_316 11 -316 val_316 11 -316 val_316 12 -316 val_316 12 -316 val_316 12 -317 val_317 11 -317 val_317 11 -317 val_317 12 -317 val_317 12 -318 val_318 11 -318 val_318 11 -318 val_318 11 -318 val_318 12 -318 val_318 12 -318 val_318 12 -321 val_321 11 -321 val_321 11 -321 val_321 12 -321 val_321 12 -322 val_322 11 -322 val_322 11 -322 val_322 12 -322 val_322 12 -323 val_323 11 -323 val_323 12 -325 val_325 11 -325 val_325 11 -325 val_325 12 -325 val_325 12 -327 val_327 11 -327 val_327 11 -327 val_327 11 -327 val_327 12 -327 val_327 12 -327 val_327 12 -33 val_33 11 -33 val_33 12 -331 val_331 11 -331 val_331 11 -331 val_331 12 -331 val_331 12 -332 val_332 11 -332 val_332 12 -333 val_333 11 -333 val_333 11 -333 val_333 12 -333 val_333 12 -335 val_335 11 -335 val_335 12 -336 val_336 11 -336 val_336 12 -338 val_338 11 -338 val_338 12 -339 val_339 11 -339 val_339 12 -34 val_34 11 -34 val_34 12 -341 val_341 11 -341 val_341 12 -342 val_342 11 -342 val_342 11 -342 val_342 12 -342 val_342 12 -344 val_344 11 -344 val_344 11 -344 val_344 12 -344 val_344 12 -345 val_345 11 -345 val_345 12 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 11 -348 val_348 12 -348 val_348 12 -348 val_348 12 -348 val_348 12 -348 val_348 12 -35 val_35 11 -35 val_35 11 -35 val_35 11 -35 val_35 12 -35 val_35 12 -35 val_35 12 -351 val_351 11 -351 val_351 12 -353 val_353 11 -353 val_353 11 -353 val_353 12 -353 val_353 12 -356 val_356 11 -356 val_356 12 -360 val_360 11 -360 val_360 12 -362 val_362 11 -362 val_362 12 -364 val_364 11 -364 val_364 12 -365 val_365 11 -365 val_365 12 -366 val_366 11 -366 val_366 12 -367 val_367 11 -367 val_367 11 -367 val_367 12 -367 val_367 12 -368 val_368 11 -368 val_368 12 -369 val_369 11 -369 val_369 11 -369 val_369 11 -369 val_369 12 -369 val_369 12 -369 val_369 12 -37 val_37 11 -37 val_37 11 -37 val_37 12 -37 val_37 12 -373 val_373 11 -373 val_373 12 -374 val_374 11 -374 val_374 12 -375 val_375 11 -375 val_375 12 -377 val_377 11 -377 val_377 12 -378 val_378 11 -378 val_378 12 -379 val_379 11 -379 val_379 12 -382 val_382 11 -382 val_382 11 -382 val_382 12 -382 val_382 12 -384 val_384 11 -384 val_384 11 -384 val_384 11 -384 val_384 12 -384 val_384 12 -384 val_384 12 -386 val_386 11 -386 val_386 12 -389 val_389 11 -389 val_389 12 -392 val_392 11 -392 val_392 12 -393 val_393 11 -393 val_393 12 -394 val_394 11 -394 val_394 12 -395 val_395 11 -395 val_395 11 -395 val_395 12 -395 val_395 12 -396 val_396 11 -396 val_396 11 -396 val_396 11 -396 val_396 12 -396 val_396 12 -396 val_396 12 -397 val_397 11 -397 val_397 11 -397 val_397 12 -397 val_397 12 -399 val_399 11 -399 val_399 11 -399 val_399 12 -399 val_399 12 -4 val_4 11 -4 val_4 12 -400 val_400 11 -400 val_400 12 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 11 -401 val_401 12 -401 val_401 12 -401 val_401 12 -401 val_401 12 -401 val_401 12 -402 val_402 11 -402 val_402 12 -403 val_403 11 -403 val_403 11 -403 val_403 11 -403 val_403 12 -403 val_403 12 -403 val_403 12 -404 val_404 11 -404 val_404 11 -404 val_404 12 -404 val_404 12 -406 val_406 11 -406 val_406 11 -406 val_406 11 -406 val_406 11 -406 val_406 12 -406 val_406 12 -406 val_406 12 -406 val_406 12 -407 val_407 11 -407 val_407 12 -409 val_409 11 -409 val_409 11 -409 val_409 11 -409 val_409 12 -409 val_409 12 -409 val_409 12 -41 val_41 11 -41 val_41 12 -411 val_411 11 -411 val_411 12 -413 val_413 11 -413 val_413 11 -413 val_413 12 -413 val_413 12 -414 val_414 11 -414 val_414 11 -414 val_414 12 -414 val_414 12 -417 val_417 11 -417 val_417 11 -417 val_417 11 -417 val_417 12 -417 val_417 12 -417 val_417 12 -418 val_418 11 -418 val_418 12 -419 val_419 11 -419 val_419 12 -42 val_42 11 -42 val_42 11 -42 val_42 12 -42 val_42 12 -421 val_421 11 -421 val_421 12 -424 val_424 11 -424 val_424 11 -424 val_424 12 -424 val_424 12 -427 val_427 11 -427 val_427 12 -429 val_429 11 -429 val_429 11 -429 val_429 12 -429 val_429 12 -43 val_43 11 -43 val_43 12 -430 val_430 11 -430 val_430 11 -430 val_430 11 -430 val_430 12 -430 val_430 12 -430 val_430 12 -431 val_431 11 -431 val_431 11 -431 val_431 11 -431 val_431 12 -431 val_431 12 -431 val_431 12 -432 val_432 11 -432 val_432 12 -435 val_435 11 -435 val_435 12 -436 val_436 11 -436 val_436 12 -437 val_437 11 -437 val_437 12 -438 val_438 11 -438 val_438 11 -438 val_438 11 -438 val_438 12 -438 val_438 12 -438 val_438 12 -439 val_439 11 -439 val_439 11 -439 val_439 12 -439 val_439 12 -44 val_44 11 -44 val_44 12 -443 val_443 11 -443 val_443 12 -444 val_444 11 -444 val_444 12 -446 val_446 11 -446 val_446 12 -448 val_448 11 -448 val_448 12 -449 val_449 11 -449 val_449 12 -452 val_452 11 -452 val_452 12 -453 val_453 11 -453 val_453 12 -454 val_454 11 -454 val_454 11 -454 val_454 11 -454 val_454 12 -454 val_454 12 -454 val_454 12 -455 val_455 11 -455 val_455 12 -457 val_457 11 -457 val_457 12 -458 val_458 11 -458 val_458 11 -458 val_458 12 -458 val_458 12 -459 val_459 11 -459 val_459 11 -459 val_459 12 -459 val_459 12 -460 val_460 11 -460 val_460 12 -462 val_462 11 -462 val_462 11 -462 val_462 12 -462 val_462 12 -463 val_463 11 -463 val_463 11 -463 val_463 12 -463 val_463 12 -466 val_466 11 -466 val_466 11 -466 val_466 11 -466 val_466 12 -466 val_466 12 -466 val_466 12 -467 val_467 11 -467 val_467 12 -468 val_468 11 -468 val_468 11 -468 val_468 11 -468 val_468 11 -468 val_468 12 -468 val_468 12 -468 val_468 12 -468 val_468 12 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 11 -469 val_469 12 -469 val_469 12 -469 val_469 12 -469 val_469 12 -469 val_469 12 -47 val_47 11 -47 val_47 12 -470 val_470 11 -470 val_470 12 -472 val_472 11 -472 val_472 12 -475 val_475 11 -475 val_475 12 -477 val_477 11 -477 val_477 12 -478 val_478 11 -478 val_478 11 -478 val_478 12 -478 val_478 12 -479 val_479 11 -479 val_479 12 -480 val_480 11 -480 val_480 11 -480 val_480 11 -480 val_480 12 -480 val_480 12 -480 val_480 12 -481 val_481 11 -481 val_481 12 -482 val_482 11 -482 val_482 12 -483 val_483 11 -483 val_483 12 -484 val_484 11 -484 val_484 12 -485 val_485 11 -485 val_485 12 -487 val_487 11 -487 val_487 12 -489 val_489 11 -489 val_489 11 -489 val_489 11 -489 val_489 11 -489 val_489 12 -489 val_489 12 -489 val_489 12 -489 val_489 12 -490 val_490 11 -490 val_490 12 -491 val_491 11 -491 val_491 12 -492 val_492 11 -492 val_492 11 -492 val_492 12 -492 val_492 12 -493 val_493 11 -493 val_493 12 -494 val_494 11 -494 val_494 12 -495 val_495 11 -495 val_495 12 -496 val_496 11 -496 val_496 12 -497 val_497 11 -497 val_497 12 -498 val_498 11 -498 val_498 11 -498 val_498 11 -498 val_498 12 -498 val_498 12 -498 val_498 12 -5 val_5 11 -5 val_5 11 -5 val_5 11 -5 val_5 12 -5 val_5 12 -5 val_5 12 -51 val_51 11 -51 val_51 11 -51 val_51 12 -51 val_51 12 -53 val_53 11 -53 val_53 12 -54 val_54 11 -54 val_54 12 -57 val_57 11 -57 val_57 12 -58 val_58 11 -58 val_58 11 -58 val_58 12 -58 val_58 12 -64 val_64 11 -64 val_64 12 -65 val_65 11 -65 val_65 12 -66 val_66 11 -66 val_66 12 -67 val_67 11 -67 val_67 11 -67 val_67 12 -67 val_67 12 -69 val_69 11 -69 val_69 12 -70 val_70 11 -70 val_70 11 -70 val_70 11 -70 val_70 12 -70 val_70 12 -70 val_70 12 -72 val_72 11 -72 val_72 11 -72 val_72 12 -72 val_72 12 -74 val_74 11 -74 val_74 12 -76 val_76 11 -76 val_76 11 -76 val_76 12 -76 val_76 12 -77 val_77 11 -77 val_77 12 -78 val_78 11 -78 val_78 12 -8 val_8 11 -8 val_8 12 -80 val_80 11 -80 val_80 12 -82 val_82 11 -82 val_82 12 -83 val_83 11 -83 val_83 11 -83 val_83 12 -83 val_83 12 -84 val_84 11 -84 val_84 11 -84 val_84 12 -84 val_84 12 -85 val_85 11 -85 val_85 12 -86 val_86 11 -86 val_86 12 -87 val_87 11 -87 val_87 12 -9 val_9 11 -9 val_9 12 -90 val_90 11 -90 val_90 11 -90 val_90 11 -90 val_90 12 -90 val_90 12 -90 val_90 12 -92 val_92 11 -92 val_92 12 -95 val_95 11 -95 val_95 11 -95 val_95 12 -95 val_95 12 -96 val_96 11 -96 val_96 12 -97 val_97 11 -97 val_97 11 -97 val_97 12 -97 val_97 12 -98 val_98 11 -98 val_98 11 -98 val_98 12 -98 val_98 12 -irG2+wNa4ZWkUKb0+hXweg== +1B2M2Y8AsgTpgAmY7PhCfg== PREHOOK: query: drop table partint PREHOOK: type: DROPTABLE PREHOOK: Input: default@partint Index: ql/src/test/results/clientpositive/allcolref_in_udf.q.out =================================================================== --- ql/src/test/results/clientpositive/allcolref_in_udf.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/allcolref_in_udf.q.out (working copy) @@ -20,7 +20,7 @@ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: concat(key, value) (type: string), array(key,value) (type: array) - outputColumnNames: _col0, _col1 + outputColumnNames: _o__c0, _o__c1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -35,7 +35,7 @@ Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink @@ -47,16 +47,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -86val_86 ["86","val_86"] -27val_27 ["27","val_27"] -98val_98 ["98","val_98"] -66val_66 ["66","val_66"] -37val_37 ["37","val_37"] -15val_15 ["15","val_15"] -82val_82 ["82","val_82"] -17val_17 ["17","val_17"] -0val_0 ["0","val_0"] -57val_57 ["57","val_57"] PREHOOK: query: -- The order of columns is decided by row schema of prev operator -- Like join which has two or more aliases, it's from left most aias to right aliases. @@ -115,7 +105,11 @@ outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 2 (type: int), concat(_col0, _col1, _col5, _col6) (type: string), concat(_col0, _col1) (type: string), concat(_col5, _col6) (type: string), concat(_col0, _col1, _col5) (type: string), concat(_col0, _col5, _col6) (type: string) + expressions: concat(_col0, _col1, _col5, _col6) (type: string), concat(_col0, _col1) (type: string), concat(_col5, _col6) (type: string), concat(_col0, _col1, _col5) (type: string), concat(_col0, _col5, _col6) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2 (type: int), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -186,20 +180,20 @@ Map Operator Tree: TableScan alias: allcolref - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: c0 (type: array) + expressions: _o__c0 (type: array) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE UDTF Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE function name: explode Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -219,13 +213,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@allcolref #### A masked pattern was here #### -238 -val_238 -86 -val_86 -311 -val_311 -27 -val_27 -165 -val_165 Index: ql/src/test/results/clientpositive/constprog_dp.q.out =================================================================== --- ql/src/test/results/clientpositive/constprog_dp.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/constprog_dp.q.out (working copy) @@ -30,14 +30,17 @@ Map Operator Tree: TableScan alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -104,6 +107,8 @@ PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest POSTHOOK: query: from srcpart insert overwrite table dest partition (ds) select key, value, ds where ds='2008-04-08' @@ -111,6 +116,5 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest@ds=2008-04-08 -POSTHOOK: Lineage: dest PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 Index: ql/src/test/results/clientpositive/groupby_resolution.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_resolution.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_resolution.q.out (working copy) @@ -690,7 +690,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -698,7 +698,7 @@ isPivotResult: true Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/groupby2_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_noskew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby2_noskew.q.out (working copy) @@ -32,22 +32,26 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: $f0 (type: string), $f1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: $f0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: $f0, $f1, $f2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + expressions: $f0 (type: string), $f1 (type: bigint), concat($f0, $f2) (type: string) + outputColumnNames: _o__c0, _o__c1, _o__c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _o__c0 (type: string), UDFToInteger(_o__c1) (type: int), _o__c2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/escape_orderby1.q.out =================================================================== --- ql/src/test/results/clientpositive/escape_orderby1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/escape_orderby1.q.out (working copy) @@ -17,18 +17,14 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: key (type: string), value (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -61,18 +57,14 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: key (type: string), value (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Index: ql/src/test/results/clientpositive/diff_part_input_formats.q.out =================================================================== --- ql/src/test/results/clientpositive/diff_part_input_formats.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/diff_part_input_formats.q.out (working copy) @@ -37,9 +37,13 @@ PREHOOK: query: SELECT count(1) FROM part_test WHERE ds='3' PREHOOK: type: QUERY PREHOOK: Input: default@part_test +PREHOOK: Input: default@part_test@ds=1 +PREHOOK: Input: default@part_test@ds=2 #### A masked pattern was here #### POSTHOOK: query: SELECT count(1) FROM part_test WHERE ds='3' POSTHOOK: type: QUERY POSTHOOK: Input: default@part_test +POSTHOOK: Input: default@part_test@ds=1 +POSTHOOK: Input: default@part_test@ds=2 #### A masked pattern was here #### 0 Index: ql/src/test/results/clientpositive/join32_lessSize.q.out =================================================================== --- ql/src/test/results/clientpositive/join32_lessSize.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/join32_lessSize.q.out (working copy) @@ -130,7 +130,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -139,31 +139,31 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -175,8 +175,8 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 - columns.types string,string + columns _col1,_col2,_col3 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -190,6 +190,50 @@ Path -> Partition: #### A masked pattern was here #### Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -232,7 +276,16 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src1 name: default.src1 -#### A masked pattern was here #### + Truncated Path -> Alias: + /src [$hdt$_1:$hdt$_1:y] + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:z + Fetch Operator + limit: -1 + Partition Description: Partition base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat @@ -278,28 +331,19 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:y - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -318,11 +362,11 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -362,8 +406,8 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 - columns.types string,string + columns _col1,_col2,_col3 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -371,16 +415,19 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 - columns.types string,string + columns _col1,_col2,_col3 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -388,11 +435,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -402,24 +451,21 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: #### A masked pattern was here #### @@ -470,8 +516,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -657,27 +703,28 @@ Stage-8 depends on stages: Stage-11 Stage-10 depends on stages: Stage-8 Stage-7 depends on stages: Stage-10 - Stage-9 depends on stages: Stage-7 - Stage-6 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 + $hdt$_1:$hdt$_3:x + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -686,12 +733,9 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 1 - - Stage: Stage-8 - Map Reduce - Map Operator Tree: + 1 _col1 (type: string) + Position of Big Table: 0 + $hdt$_1:$hdt$_3:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -704,92 +748,15 @@ expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col3 - columns.types string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Position of Big Table: 0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 - Truncated Path -> Alias: - /src1 [$hdt$_1:$hdt$_2:$hdt$_3:x] - - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:w - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:w + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: w Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -802,26 +769,24 @@ expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col1 (type: string) - Position of Big Table: 1 - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false + outputColumnNames: _col1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1, _col4 - Position of Big Table: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Position of Big Table: 0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -846,26 +811,6 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col3 - columns.types string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col3 - columns.types string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -908,10 +853,54 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src - Truncated Path -> Alias: #### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-9 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src [$hdt$_1:$hdt$_1:w] + + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:w @@ -937,7 +926,7 @@ 1 _col1 (type: string) Position of Big Table: 1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -950,17 +939,17 @@ 1 _col1 (type: string) outputColumnNames: _col1, _col3, _col6 Position of Big Table: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -993,7 +982,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10001 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: Index: ql/src/test/results/clientpositive/groupby2_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_limit.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby2_limit.q.out (working copy) @@ -18,11 +18,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) + aggregations: sum($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -37,7 +37,7 @@ aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -51,14 +51,14 @@ Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: $f1 (type: double) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: string), VALUE.$f1 (type: double) + outputColumnNames: $f0, $f1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 @@ -73,7 +73,7 @@ Stage: Stage-0 Fetch Operator - limit: 5 + limit: -1 Processor Tree: ListSink @@ -85,8 +85,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 Index: ql/src/test/results/clientpositive/cross_product_check_1.q.out =================================================================== --- ql/src/test/results/clientpositive/cross_product_check_1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/cross_product_check_1.q.out (working copy) @@ -24,7 +24,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@B -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[6][tables = [key, value, key, value]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from A join B PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join B @@ -39,26 +39,18 @@ Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -66,11 +58,11 @@ keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -98,27 +90,27 @@ Map Operator Tree: TableScan alias: d1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan alias: d2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) Reduce Operator Tree: Join Operator @@ -128,7 +120,7 @@ 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -142,14 +134,14 @@ TableScan Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator @@ -159,14 +151,14 @@ 0 1 outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -201,26 +193,26 @@ Map Operator Tree: TableScan alias: d1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: d2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -229,12 +221,12 @@ 0 key (type: string) 1 key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -250,13 +242,13 @@ key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -270,14 +262,14 @@ TableScan Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string) TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator @@ -287,14 +279,14 @@ 0 1 outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -324,17 +316,17 @@ Map Operator Tree: TableScan alias: d1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string) TableScan alias: d2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -343,12 +335,12 @@ 0 1 outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -364,13 +356,13 @@ key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -384,14 +376,14 @@ TableScan Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string) TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator @@ -401,14 +393,14 @@ 0 1 outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -442,27 +434,27 @@ Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -476,12 +468,12 @@ TableScan Reduce Output Operator sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string) TableScan Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator @@ -491,10 +483,10 @@ 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -505,26 +497,26 @@ Map Operator Tree: TableScan alias: d1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: d2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -533,12 +525,12 @@ 0 key (type: string) 1 key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -554,13 +546,13 @@ key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: Index: ql/src/test/results/clientpositive/cluster.q.out =================================================================== --- ql/src/test/results/clientpositive/cluster.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/cluster.q.out (working copy) @@ -435,7 +435,11 @@ value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), VALUE._col0 (type: string) + expressions: VALUE._col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '20' (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/union_remove_6_subq.q.out =================================================================== --- ql/src/test/results/clientpositive/union_remove_6_subq.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/union_remove_6_subq.q.out (working copy) @@ -559,14 +559,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: avg_window_0 arguments: _col1 name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: double) + expressions: _col0 (type: string), avg_window_0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/groupby3_map_skew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_map_skew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby3_map_skew.q.out (working copy) @@ -47,11 +47,11 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) - keys: _col0 (type: string) + aggregations: sum($f0), avg($f0), avg(DISTINCT $f0), max($f0), min($f0), std($f0), stddev_samp($f0), variance($f0), var_samp($f0) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -86,15 +86,15 @@ Group By Operator aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -144,7 +144,7 @@ POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -161,4 +161,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10356 98.0 0.0 142.92681 143.06995 20428.07288 20469.0109 Index: ql/src/test/results/clientpositive/windowing_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/windowing_streaming.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/windowing_streaming.q.out (working copy) @@ -89,7 +89,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -97,7 +97,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _wcol0 (type: int) + expressions: _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -160,7 +160,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -168,10 +168,10 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 < 4) (type: boolean) + predicate: (rank_window_0 < 4) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _wcol0 (type: int) + expressions: _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -329,7 +329,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -337,10 +337,10 @@ isPivotResult: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 < 5) (type: boolean) + predicate: (rank_window_0 < 5) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: tinyint), _col5 (type: double), _wcol0 (type: int) + expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/cast1.q.out =================================================================== --- ql/src/test/results/clientpositive/cast1.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/cast1.q.out (working copy) @@ -34,7 +34,7 @@ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int), 5.0 (type: double), 5.0 (type: double), 5.0 (type: double), 5 (type: int), true (type: boolean), 1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _o__c0, _o__c1, _o__c2, _o__c3, _o__c4, _o__c5, _o__c6 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -120,4 +120,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -5 5.0 5.0 5.0 5 true 1 Index: ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out (working copy) @@ -53,34 +53,6 @@ SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes POSTHOOK: type: QUERY POSTHOOK: Input: default@episodes -POSTHOOK: Output: default@episodes_partitioned@doctor_pt=1 -POSTHOOK: Output: default@episodes_partitioned@doctor_pt=11 -POSTHOOK: Output: default@episodes_partitioned@doctor_pt=2 -POSTHOOK: Output: default@episodes_partitioned@doctor_pt=4 -POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 -POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 -POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] PREHOOK: query: ALTER TABLE episodes_partitioned SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH @@ -152,60 +124,31 @@ PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 PREHOOK: type: QUERY PREHOOK: Input: default@episodes_partitioned -PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 -PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 POSTHOOK: type: QUERY POSTHOOK: Input: default@episodes_partitioned -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 #### A masked pattern was here #### -Rose 26 March 2005 9 0 9 -The Doctor's Wife 14 May 2011 11 0 11 -The Eleventh Hour 3 April 2010 11 0 11 PREHOOK: query: -- Verify that Fetch works in addition to Map SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@episodes_partitioned -PREHOOK: Input: default@episodes_partitioned@doctor_pt=1 -PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 -PREHOOK: Input: default@episodes_partitioned@doctor_pt=2 -PREHOOK: Input: default@episodes_partitioned@doctor_pt=4 -PREHOOK: Input: default@episodes_partitioned@doctor_pt=5 -PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 -PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 #### A masked pattern was here #### POSTHOOK: query: -- Verify that Fetch works in addition to Map SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@episodes_partitioned -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1 -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2 -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4 -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 #### A masked pattern was here #### -An Unearthly Child 23 November 1963 1 0 1 -Horror of Fang Rock 3 September 1977 4 0 4 -Rose 26 March 2005 9 0 9 -The Doctor's Wife 14 May 2011 11 0 11 -The Eleventh Hour 3 April 2010 11 0 11 PREHOOK: query: -- Fetch w/filter to specific partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 PREHOOK: type: QUERY PREHOOK: Input: default@episodes_partitioned -PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 #### A masked pattern was here #### POSTHOOK: query: -- Fetch w/filter to specific partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 POSTHOOK: type: QUERY POSTHOOK: Input: default@episodes_partitioned -POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 #### A masked pattern was here #### -The Mysterious Planet 6 September 1986 6 0 6 PREHOOK: query: -- Fetch w/non-existent partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/alter_merge_3.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_merge_3.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/alter_merge_3.q.out (working copy) @@ -33,7 +33,7 @@ POSTHOOK: Input: default@src_rc_merge_test_part POSTHOOK: Input: default@src_rc_merge_test_part@ds=2014-08-04 #### A masked pattern was here #### -15 +0 #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS #### A masked pattern was here #### @@ -54,14 +54,16 @@ PREHOOK: query: select count(1) from src_rc_merge_test_part where ds='2014-08-05' PREHOOK: type: QUERY PREHOOK: Input: default@src_rc_merge_test_part +PREHOOK: Input: default@src_rc_merge_test_part@ds=2014-08-04 PREHOOK: Input: default@src_rc_merge_test_part@ds=2014-08-05 #### A masked pattern was here #### POSTHOOK: query: select count(1) from src_rc_merge_test_part where ds='2014-08-05' POSTHOOK: type: QUERY POSTHOOK: Input: default@src_rc_merge_test_part +POSTHOOK: Input: default@src_rc_merge_test_part@ds=2014-08-04 POSTHOOK: Input: default@src_rc_merge_test_part@ds=2014-08-05 #### A masked pattern was here #### -15 +0 PREHOOK: query: drop table src_rc_merge_test_part PREHOOK: type: DROPTABLE PREHOOK: Input: default@src_rc_merge_test_part Index: ql/src/test/results/clientpositive/groupby_sort_6.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_6.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out (working copy) @@ -72,10 +72,10 @@ aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToInteger($f0) (type: int), UDFToInteger($f1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -208,16 +208,93 @@ STAGE PLANS: Stage: Stage-1 Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '1') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count($f1) + keys: $f0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,val + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1 + numFiles 1 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 30 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,val + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + /t1/ds=2 [t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToInteger($f0) (type: int), UDFToInteger($f1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -287,11 +364,13 @@ SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=2 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=2 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] @@ -358,13 +437,17 @@ alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + expressions: key (type: string), 1 (type: int) + outputColumnNames: $f0, $f1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count($f1) + keys: $f0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -425,17 +508,17 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1/ds=2 [$hdt$_0:t1] + /t1/ds=2 [t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: $f0, $f1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToInteger($f0) (type: int), UDFToInteger($f1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -523,8 +606,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -1 1 -2 1 -3 1 -7 1 -8 2 Index: ql/src/test/results/clientpositive/groupby_grouping_window.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out (working copy) @@ -42,22 +42,22 @@ Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: category (type: int), live (type: int), comments (type: int) outputColumnNames: category, live, comments - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator aggregations: max(live), max(comments) keys: category (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: int) Reduce Operator Tree: Group By Operator @@ -65,11 +65,11 @@ keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE pruneGroupingSetId: true Filter Operator predicate: (_col3 > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -85,13 +85,13 @@ key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: int) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE PTF Operator Function definitions: Input definition @@ -106,20 +106,24 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col3 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _wcol0 (type: int) + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col2, _col3, rank_window_0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -147,7 +151,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t #### A masked pattern was here #### -NULL 0 2 1 -86 0 2 1 -238 0 2 1 -311 0 2 1 Index: ql/src/test/results/clientpositive/create_or_replace_view.q.out =================================================================== --- ql/src/test/results/clientpositive/create_or_replace_view.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/create_or_replace_view.q.out (working copy) @@ -64,25 +64,37 @@ PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: vt@v PREHOOK: Output: vt@v POSTHOOK: query: alter view vt.v add partition (ds='2008-04-08',hr='11') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: vt@v POSTHOOK: Output: vt@v POSTHOOK: Output: vt@v@ds=2008-04-08/hr=11 PREHOOK: query: alter view vt.v add partition (ds='2008-04-08',hr='12') PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: vt@v PREHOOK: Output: vt@v POSTHOOK: query: alter view vt.v add partition (ds='2008-04-08',hr='12') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: vt@v POSTHOOK: Output: vt@v POSTHOOK: Output: vt@v@ds=2008-04-08/hr=12 @@ -90,17 +102,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: vt@v #### A masked pattern was here #### POSTHOOK: query: select * from vt.v where value='val_409' and ds='2008-04-08' and hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: vt@v #### A masked pattern was here #### -409 val_409 2008-04-08 11 -409 val_409 2008-04-08 11 -409 val_409 2008-04-08 11 PREHOOK: query: describe formatted vt.v PREHOOK: type: DESCTABLE PREHOOK: Input: vt@v @@ -185,17 +200,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: vt@v #### A masked pattern was here #### POSTHOOK: query: select * from vt.v where value='val_409' and ds='2008-04-08' and hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: vt@v #### A masked pattern was here #### -val_409 2008-04-08 11 -val_409 2008-04-08 11 -val_409 2008-04-08 11 PREHOOK: query: describe formatted vt.v PREHOOK: type: DESCTABLE PREHOOK: Input: vt@v @@ -255,17 +273,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: vt@v #### A masked pattern was here #### POSTHOOK: query: select * from vt.v where value='val_409' and ds='2008-04-08' and hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: vt@v #### A masked pattern was here #### -409 val_409 2008-04-08 11 -409 val_409 2008-04-08 11 -409 val_409 2008-04-08 11 PREHOOK: query: describe formatted vt.v PREHOOK: type: DESCTABLE PREHOOK: Input: vt@v Index: ql/src/test/results/clientpositive/avro_decimal_native.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_decimal_native.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/avro_decimal_native.q.out (working copy) @@ -38,7 +38,7 @@ POSTHOOK: Input: default@dec # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value decimal(8,4) -12.25 234.79 0 6 from deserializer +value decimal(8,4) 0 0 from deserializer PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec @@ -85,16 +85,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_dec #### A masked pattern was here #### -Tom 234.79 -Beck 77.34 -Snow 55.71 -Mary 4.33 -Cluck 5.96 -Tom -12.25 -Mary 33.33 -Tom 19 -Beck 0 -Beck 79.9 PREHOOK: query: DROP TABLE IF EXISTS avro_dec1 PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec1 @@ -139,16 +129,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_dec1 #### A masked pattern was here #### -234.8 -77.3 -55.7 -4.3 -6 -12.3 -33.3 -19 -3.2 -79.9 +Tom 234.8 +Beck 77.3 +Snow 55.7 +Mary 4.3 +Cluck 6 +Tom 12.3 +Mary 33.3 +Tom 19 +Beck 3.2 +Beck 79.9 PREHOOK: query: DROP TABLE dec PREHOOK: type: DROPTABLE PREHOOK: Input: default@dec Index: ql/src/test/results/clientpositive/groupby9.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby9.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby9.q.out (working copy) @@ -176,315 +176,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 PREHOOK: query: SELECT DEST2.* FROM DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -493,315 +184,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -0 val_0 1 -10 val_10 1 -100 val_100 1 -103 val_103 1 -104 val_104 1 -105 val_105 1 -11 val_11 1 -111 val_111 1 -113 val_113 1 -114 val_114 1 -116 val_116 1 -118 val_118 1 -119 val_119 1 -12 val_12 1 -120 val_120 1 -125 val_125 1 -126 val_126 1 -128 val_128 1 -129 val_129 1 -131 val_131 1 -133 val_133 1 -134 val_134 1 -136 val_136 1 -137 val_137 1 -138 val_138 1 -143 val_143 1 -145 val_145 1 -146 val_146 1 -149 val_149 1 -15 val_15 1 -150 val_150 1 -152 val_152 1 -153 val_153 1 -155 val_155 1 -156 val_156 1 -157 val_157 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -163 val_163 1 -164 val_164 1 -165 val_165 1 -166 val_166 1 -167 val_167 1 -168 val_168 1 -169 val_169 1 -17 val_17 1 -170 val_170 1 -172 val_172 1 -174 val_174 1 -175 val_175 1 -176 val_176 1 -177 val_177 1 -178 val_178 1 -179 val_179 1 -18 val_18 1 -180 val_180 1 -181 val_181 1 -183 val_183 1 -186 val_186 1 -187 val_187 1 -189 val_189 1 -19 val_19 1 -190 val_190 1 -191 val_191 1 -192 val_192 1 -193 val_193 1 -194 val_194 1 -195 val_195 1 -196 val_196 1 -197 val_197 1 -199 val_199 1 -2 val_2 1 -20 val_20 1 -200 val_200 1 -201 val_201 1 -202 val_202 1 -203 val_203 1 -205 val_205 1 -207 val_207 1 -208 val_208 1 -209 val_209 1 -213 val_213 1 -214 val_214 1 -216 val_216 1 -217 val_217 1 -218 val_218 1 -219 val_219 1 -221 val_221 1 -222 val_222 1 -223 val_223 1 -224 val_224 1 -226 val_226 1 -228 val_228 1 -229 val_229 1 -230 val_230 1 -233 val_233 1 -235 val_235 1 -237 val_237 1 -238 val_238 1 -239 val_239 1 -24 val_24 1 -241 val_241 1 -242 val_242 1 -244 val_244 1 -247 val_247 1 -248 val_248 1 -249 val_249 1 -252 val_252 1 -255 val_255 1 -256 val_256 1 -257 val_257 1 -258 val_258 1 -26 val_26 1 -260 val_260 1 -262 val_262 1 -263 val_263 1 -265 val_265 1 -266 val_266 1 -27 val_27 1 -272 val_272 1 -273 val_273 1 -274 val_274 1 -275 val_275 1 -277 val_277 1 -278 val_278 1 -28 val_28 1 -280 val_280 1 -281 val_281 1 -282 val_282 1 -283 val_283 1 -284 val_284 1 -285 val_285 1 -286 val_286 1 -287 val_287 1 -288 val_288 1 -289 val_289 1 -291 val_291 1 -292 val_292 1 -296 val_296 1 -298 val_298 1 -30 val_30 1 -302 val_302 1 -305 val_305 1 -306 val_306 1 -307 val_307 1 -308 val_308 1 -309 val_309 1 -310 val_310 1 -311 val_311 1 -315 val_315 1 -316 val_316 1 -317 val_317 1 -318 val_318 1 -321 val_321 1 -322 val_322 1 -323 val_323 1 -325 val_325 1 -327 val_327 1 -33 val_33 1 -331 val_331 1 -332 val_332 1 -333 val_333 1 -335 val_335 1 -336 val_336 1 -338 val_338 1 -339 val_339 1 -34 val_34 1 -341 val_341 1 -342 val_342 1 -344 val_344 1 -345 val_345 1 -348 val_348 1 -35 val_35 1 -351 val_351 1 -353 val_353 1 -356 val_356 1 -360 val_360 1 -362 val_362 1 -364 val_364 1 -365 val_365 1 -366 val_366 1 -367 val_367 1 -368 val_368 1 -369 val_369 1 -37 val_37 1 -373 val_373 1 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -379 val_379 1 -382 val_382 1 -384 val_384 1 -386 val_386 1 -389 val_389 1 -392 val_392 1 -393 val_393 1 -394 val_394 1 -395 val_395 1 -396 val_396 1 -397 val_397 1 -399 val_399 1 -4 val_4 1 -400 val_400 1 -401 val_401 1 -402 val_402 1 -403 val_403 1 -404 val_404 1 -406 val_406 1 -407 val_407 1 -409 val_409 1 -41 val_41 1 -411 val_411 1 -413 val_413 1 -414 val_414 1 -417 val_417 1 -418 val_418 1 -419 val_419 1 -42 val_42 1 -421 val_421 1 -424 val_424 1 -427 val_427 1 -429 val_429 1 -43 val_43 1 -430 val_430 1 -431 val_431 1 -432 val_432 1 -435 val_435 1 -436 val_436 1 -437 val_437 1 -438 val_438 1 -439 val_439 1 -44 val_44 1 -443 val_443 1 -444 val_444 1 -446 val_446 1 -448 val_448 1 -449 val_449 1 -452 val_452 1 -453 val_453 1 -454 val_454 1 -455 val_455 1 -457 val_457 1 -458 val_458 1 -459 val_459 1 -460 val_460 1 -462 val_462 1 -463 val_463 1 -466 val_466 1 -467 val_467 1 -468 val_468 1 -469 val_469 1 -47 val_47 1 -470 val_470 1 -472 val_472 1 -475 val_475 1 -477 val_477 1 -478 val_478 1 -479 val_479 1 -480 val_480 1 -481 val_481 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -485 val_485 1 -487 val_487 1 -489 val_489 1 -490 val_490 1 -491 val_491 1 -492 val_492 1 -493 val_493 1 -494 val_494 1 -495 val_495 1 -496 val_496 1 -497 val_497 1 -498 val_498 1 -5 val_5 1 -51 val_51 1 -53 val_53 1 -54 val_54 1 -57 val_57 1 -58 val_58 1 -64 val_64 1 -65 val_65 1 -66 val_66 1 -67 val_67 1 -69 val_69 1 -70 val_70 1 -72 val_72 1 -74 val_74 1 -76 val_76 1 -77 val_77 1 -78 val_78 1 -8 val_8 1 -80 val_80 1 -82 val_82 1 -83 val_83 1 -84 val_84 1 -85 val_85 1 -86 val_86 1 -87 val_87 1 -9 val_9 1 -90 val_90 1 -92 val_92 1 -95 val_95 1 -96 val_96 1 -97 val_97 1 -98 val_98 1 PREHOOK: query: EXPLAIN FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -908,9 +290,13 @@ outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -960,315 +346,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 PREHOOK: query: SELECT DEST2.* FROM DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -1277,315 +354,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -0 val_0 1 -10 val_10 1 -100 val_100 1 -103 val_103 1 -104 val_104 1 -105 val_105 1 -11 val_11 1 -111 val_111 1 -113 val_113 1 -114 val_114 1 -116 val_116 1 -118 val_118 1 -119 val_119 1 -12 val_12 1 -120 val_120 1 -125 val_125 1 -126 val_126 1 -128 val_128 1 -129 val_129 1 -131 val_131 1 -133 val_133 1 -134 val_134 1 -136 val_136 1 -137 val_137 1 -138 val_138 1 -143 val_143 1 -145 val_145 1 -146 val_146 1 -149 val_149 1 -15 val_15 1 -150 val_150 1 -152 val_152 1 -153 val_153 1 -155 val_155 1 -156 val_156 1 -157 val_157 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -163 val_163 1 -164 val_164 1 -165 val_165 1 -166 val_166 1 -167 val_167 1 -168 val_168 1 -169 val_169 1 -17 val_17 1 -170 val_170 1 -172 val_172 1 -174 val_174 1 -175 val_175 1 -176 val_176 1 -177 val_177 1 -178 val_178 1 -179 val_179 1 -18 val_18 1 -180 val_180 1 -181 val_181 1 -183 val_183 1 -186 val_186 1 -187 val_187 1 -189 val_189 1 -19 val_19 1 -190 val_190 1 -191 val_191 1 -192 val_192 1 -193 val_193 1 -194 val_194 1 -195 val_195 1 -196 val_196 1 -197 val_197 1 -199 val_199 1 -2 val_2 1 -20 val_20 1 -200 val_200 1 -201 val_201 1 -202 val_202 1 -203 val_203 1 -205 val_205 1 -207 val_207 1 -208 val_208 1 -209 val_209 1 -213 val_213 1 -214 val_214 1 -216 val_216 1 -217 val_217 1 -218 val_218 1 -219 val_219 1 -221 val_221 1 -222 val_222 1 -223 val_223 1 -224 val_224 1 -226 val_226 1 -228 val_228 1 -229 val_229 1 -230 val_230 1 -233 val_233 1 -235 val_235 1 -237 val_237 1 -238 val_238 1 -239 val_239 1 -24 val_24 1 -241 val_241 1 -242 val_242 1 -244 val_244 1 -247 val_247 1 -248 val_248 1 -249 val_249 1 -252 val_252 1 -255 val_255 1 -256 val_256 1 -257 val_257 1 -258 val_258 1 -26 val_26 1 -260 val_260 1 -262 val_262 1 -263 val_263 1 -265 val_265 1 -266 val_266 1 -27 val_27 1 -272 val_272 1 -273 val_273 1 -274 val_274 1 -275 val_275 1 -277 val_277 1 -278 val_278 1 -28 val_28 1 -280 val_280 1 -281 val_281 1 -282 val_282 1 -283 val_283 1 -284 val_284 1 -285 val_285 1 -286 val_286 1 -287 val_287 1 -288 val_288 1 -289 val_289 1 -291 val_291 1 -292 val_292 1 -296 val_296 1 -298 val_298 1 -30 val_30 1 -302 val_302 1 -305 val_305 1 -306 val_306 1 -307 val_307 1 -308 val_308 1 -309 val_309 1 -310 val_310 1 -311 val_311 1 -315 val_315 1 -316 val_316 1 -317 val_317 1 -318 val_318 1 -321 val_321 1 -322 val_322 1 -323 val_323 1 -325 val_325 1 -327 val_327 1 -33 val_33 1 -331 val_331 1 -332 val_332 1 -333 val_333 1 -335 val_335 1 -336 val_336 1 -338 val_338 1 -339 val_339 1 -34 val_34 1 -341 val_341 1 -342 val_342 1 -344 val_344 1 -345 val_345 1 -348 val_348 1 -35 val_35 1 -351 val_351 1 -353 val_353 1 -356 val_356 1 -360 val_360 1 -362 val_362 1 -364 val_364 1 -365 val_365 1 -366 val_366 1 -367 val_367 1 -368 val_368 1 -369 val_369 1 -37 val_37 1 -373 val_373 1 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -379 val_379 1 -382 val_382 1 -384 val_384 1 -386 val_386 1 -389 val_389 1 -392 val_392 1 -393 val_393 1 -394 val_394 1 -395 val_395 1 -396 val_396 1 -397 val_397 1 -399 val_399 1 -4 val_4 1 -400 val_400 1 -401 val_401 1 -402 val_402 1 -403 val_403 1 -404 val_404 1 -406 val_406 1 -407 val_407 1 -409 val_409 1 -41 val_41 1 -411 val_411 1 -413 val_413 1 -414 val_414 1 -417 val_417 1 -418 val_418 1 -419 val_419 1 -42 val_42 1 -421 val_421 1 -424 val_424 1 -427 val_427 1 -429 val_429 1 -43 val_43 1 -430 val_430 1 -431 val_431 1 -432 val_432 1 -435 val_435 1 -436 val_436 1 -437 val_437 1 -438 val_438 1 -439 val_439 1 -44 val_44 1 -443 val_443 1 -444 val_444 1 -446 val_446 1 -448 val_448 1 -449 val_449 1 -452 val_452 1 -453 val_453 1 -454 val_454 1 -455 val_455 1 -457 val_457 1 -458 val_458 1 -459 val_459 1 -460 val_460 1 -462 val_462 1 -463 val_463 1 -466 val_466 1 -467 val_467 1 -468 val_468 1 -469 val_469 1 -47 val_47 1 -470 val_470 1 -472 val_472 1 -475 val_475 1 -477 val_477 1 -478 val_478 1 -479 val_479 1 -480 val_480 1 -481 val_481 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -485 val_485 1 -487 val_487 1 -489 val_489 1 -490 val_490 1 -491 val_491 1 -492 val_492 1 -493 val_493 1 -494 val_494 1 -495 val_495 1 -496 val_496 1 -497 val_497 1 -498 val_498 1 -5 val_5 1 -51 val_51 1 -53 val_53 1 -54 val_54 1 -57 val_57 1 -58 val_58 1 -64 val_64 1 -65 val_65 1 -66 val_66 1 -67 val_67 1 -69 val_69 1 -70 val_70 1 -72 val_72 1 -74 val_74 1 -76 val_76 1 -77 val_77 1 -78 val_78 1 -8 val_8 1 -80 val_80 1 -82 val_82 1 -83 val_83 1 -84 val_84 1 -85 val_85 1 -86 val_86 1 -87 val_87 1 -9 val_9 1 -90 val_90 1 -92 val_92 1 -95 val_95 1 -96 val_96 1 -97 val_97 1 -98 val_98 1 PREHOOK: query: EXPLAIN FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -1744,315 +512,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 PREHOOK: query: SELECT DEST2.* FROM DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -2061,315 +520,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -0 val_0 1 -10 val_10 1 -100 val_100 1 -103 val_103 1 -104 val_104 1 -105 val_105 1 -11 val_11 1 -111 val_111 1 -113 val_113 1 -114 val_114 1 -116 val_116 1 -118 val_118 1 -119 val_119 1 -12 val_12 1 -120 val_120 1 -125 val_125 1 -126 val_126 1 -128 val_128 1 -129 val_129 1 -131 val_131 1 -133 val_133 1 -134 val_134 1 -136 val_136 1 -137 val_137 1 -138 val_138 1 -143 val_143 1 -145 val_145 1 -146 val_146 1 -149 val_149 1 -15 val_15 1 -150 val_150 1 -152 val_152 1 -153 val_153 1 -155 val_155 1 -156 val_156 1 -157 val_157 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -163 val_163 1 -164 val_164 1 -165 val_165 1 -166 val_166 1 -167 val_167 1 -168 val_168 1 -169 val_169 1 -17 val_17 1 -170 val_170 1 -172 val_172 1 -174 val_174 1 -175 val_175 1 -176 val_176 1 -177 val_177 1 -178 val_178 1 -179 val_179 1 -18 val_18 1 -180 val_180 1 -181 val_181 1 -183 val_183 1 -186 val_186 1 -187 val_187 1 -189 val_189 1 -19 val_19 1 -190 val_190 1 -191 val_191 1 -192 val_192 1 -193 val_193 1 -194 val_194 1 -195 val_195 1 -196 val_196 1 -197 val_197 1 -199 val_199 1 -2 val_2 1 -20 val_20 1 -200 val_200 1 -201 val_201 1 -202 val_202 1 -203 val_203 1 -205 val_205 1 -207 val_207 1 -208 val_208 1 -209 val_209 1 -213 val_213 1 -214 val_214 1 -216 val_216 1 -217 val_217 1 -218 val_218 1 -219 val_219 1 -221 val_221 1 -222 val_222 1 -223 val_223 1 -224 val_224 1 -226 val_226 1 -228 val_228 1 -229 val_229 1 -230 val_230 1 -233 val_233 1 -235 val_235 1 -237 val_237 1 -238 val_238 1 -239 val_239 1 -24 val_24 1 -241 val_241 1 -242 val_242 1 -244 val_244 1 -247 val_247 1 -248 val_248 1 -249 val_249 1 -252 val_252 1 -255 val_255 1 -256 val_256 1 -257 val_257 1 -258 val_258 1 -26 val_26 1 -260 val_260 1 -262 val_262 1 -263 val_263 1 -265 val_265 1 -266 val_266 1 -27 val_27 1 -272 val_272 1 -273 val_273 1 -274 val_274 1 -275 val_275 1 -277 val_277 1 -278 val_278 1 -28 val_28 1 -280 val_280 1 -281 val_281 1 -282 val_282 1 -283 val_283 1 -284 val_284 1 -285 val_285 1 -286 val_286 1 -287 val_287 1 -288 val_288 1 -289 val_289 1 -291 val_291 1 -292 val_292 1 -296 val_296 1 -298 val_298 1 -30 val_30 1 -302 val_302 1 -305 val_305 1 -306 val_306 1 -307 val_307 1 -308 val_308 1 -309 val_309 1 -310 val_310 1 -311 val_311 1 -315 val_315 1 -316 val_316 1 -317 val_317 1 -318 val_318 1 -321 val_321 1 -322 val_322 1 -323 val_323 1 -325 val_325 1 -327 val_327 1 -33 val_33 1 -331 val_331 1 -332 val_332 1 -333 val_333 1 -335 val_335 1 -336 val_336 1 -338 val_338 1 -339 val_339 1 -34 val_34 1 -341 val_341 1 -342 val_342 1 -344 val_344 1 -345 val_345 1 -348 val_348 1 -35 val_35 1 -351 val_351 1 -353 val_353 1 -356 val_356 1 -360 val_360 1 -362 val_362 1 -364 val_364 1 -365 val_365 1 -366 val_366 1 -367 val_367 1 -368 val_368 1 -369 val_369 1 -37 val_37 1 -373 val_373 1 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -379 val_379 1 -382 val_382 1 -384 val_384 1 -386 val_386 1 -389 val_389 1 -392 val_392 1 -393 val_393 1 -394 val_394 1 -395 val_395 1 -396 val_396 1 -397 val_397 1 -399 val_399 1 -4 val_4 1 -400 val_400 1 -401 val_401 1 -402 val_402 1 -403 val_403 1 -404 val_404 1 -406 val_406 1 -407 val_407 1 -409 val_409 1 -41 val_41 1 -411 val_411 1 -413 val_413 1 -414 val_414 1 -417 val_417 1 -418 val_418 1 -419 val_419 1 -42 val_42 1 -421 val_421 1 -424 val_424 1 -427 val_427 1 -429 val_429 1 -43 val_43 1 -430 val_430 1 -431 val_431 1 -432 val_432 1 -435 val_435 1 -436 val_436 1 -437 val_437 1 -438 val_438 1 -439 val_439 1 -44 val_44 1 -443 val_443 1 -444 val_444 1 -446 val_446 1 -448 val_448 1 -449 val_449 1 -452 val_452 1 -453 val_453 1 -454 val_454 1 -455 val_455 1 -457 val_457 1 -458 val_458 1 -459 val_459 1 -460 val_460 1 -462 val_462 1 -463 val_463 1 -466 val_466 1 -467 val_467 1 -468 val_468 1 -469 val_469 1 -47 val_47 1 -470 val_470 1 -472 val_472 1 -475 val_475 1 -477 val_477 1 -478 val_478 1 -479 val_479 1 -480 val_480 1 -481 val_481 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -485 val_485 1 -487 val_487 1 -489 val_489 1 -490 val_490 1 -491 val_491 1 -492 val_492 1 -493 val_493 1 -494 val_494 1 -495 val_495 1 -496 val_496 1 -497 val_497 1 -498 val_498 1 -5 val_5 1 -51 val_51 1 -53 val_53 1 -54 val_54 1 -57 val_57 1 -58 val_58 1 -64 val_64 1 -65 val_65 1 -66 val_66 1 -67 val_67 1 -69 val_69 1 -70 val_70 1 -72 val_72 1 -74 val_74 1 -76 val_76 1 -77 val_77 1 -78 val_78 1 -8 val_8 1 -80 val_80 1 -82 val_82 1 -83 val_83 1 -84 val_84 1 -85 val_85 1 -86 val_86 1 -87 val_87 1 -9 val_9 1 -90 val_90 1 -92 val_92 1 -95 val_95 1 -96 val_96 1 -97 val_97 1 -98 val_98 1 PREHOOK: query: EXPLAIN FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -2530,315 +680,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 -116 1 -118 2 -119 3 -12 2 -120 2 -125 2 -126 1 -128 3 -129 2 -131 1 -133 1 -134 2 -136 1 -137 2 -138 4 -143 1 -145 1 -146 2 -149 2 -15 2 -150 1 -152 2 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 2 -165 2 -166 1 -167 3 -168 1 -169 4 -17 1 -170 1 -172 2 -174 2 -175 2 -176 2 -177 1 -178 1 -179 2 -18 2 -180 1 -181 1 -183 1 -186 1 -187 3 -189 1 -19 1 -190 1 -191 2 -192 1 -193 3 -194 1 -195 2 -196 1 -197 2 -199 3 -2 1 -20 1 -200 2 -201 1 -202 1 -203 2 -205 2 -207 2 -208 3 -209 2 -213 2 -214 1 -216 2 -217 2 -218 1 -219 2 -221 2 -222 1 -223 2 -224 2 -226 1 -228 1 -229 2 -230 5 -233 2 -235 1 -237 2 -238 2 -239 2 -24 2 -241 1 -242 2 -244 1 -247 1 -248 1 -249 1 -252 1 -255 2 -256 2 -257 1 -258 1 -26 2 -260 1 -262 1 -263 1 -265 2 -266 1 -27 1 -272 2 -273 3 -274 1 -275 1 -277 4 -278 2 -28 1 -280 2 -281 2 -282 2 -283 1 -284 1 -285 1 -286 1 -287 1 -288 2 -289 1 -291 1 -292 1 -296 1 -298 3 -30 1 -302 1 -305 1 -306 1 -307 2 -308 1 -309 2 -310 1 -311 3 -315 1 -316 3 -317 2 -318 3 -321 2 -322 2 -323 1 -325 2 -327 3 -33 1 -331 2 -332 1 -333 2 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 2 -344 2 -345 1 -348 5 -35 3 -351 1 -353 2 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 2 -368 1 -369 3 -37 2 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 2 -384 3 -386 1 -389 1 -392 1 -393 1 -394 1 -395 2 -396 3 -397 2 -399 2 -4 1 -400 1 -401 5 -402 1 -403 3 -404 2 -406 4 -407 1 -409 3 -41 1 -411 1 -413 2 -414 2 -417 3 -418 1 -419 1 -42 2 -421 1 -424 2 -427 1 -429 2 -43 1 -430 3 -431 3 -432 1 -435 1 -436 1 -437 1 -438 3 -439 2 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 3 -455 1 -457 1 -458 2 -459 2 -460 1 -462 2 -463 2 -466 3 -467 1 -468 4 -469 5 -47 1 -470 1 -472 1 -475 1 -477 1 -478 2 -479 1 -480 3 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 4 -490 1 -491 1 -492 2 -493 1 -494 1 -495 1 -496 1 -497 1 -498 3 -5 3 -51 2 -53 1 -54 1 -57 1 -58 2 -64 1 -65 1 -66 1 -67 2 -69 1 -70 3 -72 2 -74 1 -76 2 -77 1 -78 1 -8 1 -80 1 -82 1 -83 2 -84 2 -85 1 -86 1 -87 1 -9 1 -90 3 -92 1 -95 2 -96 1 -97 2 -98 2 PREHOOK: query: SELECT DEST2.* FROM DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -2847,315 +688,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -0 val_0 3 -10 val_10 1 -100 val_100 2 -103 val_103 2 -104 val_104 2 -105 val_105 1 -11 val_11 1 -111 val_111 1 -113 val_113 2 -114 val_114 1 -116 val_116 1 -118 val_118 2 -119 val_119 3 -12 val_12 2 -120 val_120 2 -125 val_125 2 -126 val_126 1 -128 val_128 3 -129 val_129 2 -131 val_131 1 -133 val_133 1 -134 val_134 2 -136 val_136 1 -137 val_137 2 -138 val_138 4 -143 val_143 1 -145 val_145 1 -146 val_146 2 -149 val_149 2 -15 val_15 2 -150 val_150 1 -152 val_152 2 -153 val_153 1 -155 val_155 1 -156 val_156 1 -157 val_157 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -163 val_163 1 -164 val_164 2 -165 val_165 2 -166 val_166 1 -167 val_167 3 -168 val_168 1 -169 val_169 4 -17 val_17 1 -170 val_170 1 -172 val_172 2 -174 val_174 2 -175 val_175 2 -176 val_176 2 -177 val_177 1 -178 val_178 1 -179 val_179 2 -18 val_18 2 -180 val_180 1 -181 val_181 1 -183 val_183 1 -186 val_186 1 -187 val_187 3 -189 val_189 1 -19 val_19 1 -190 val_190 1 -191 val_191 2 -192 val_192 1 -193 val_193 3 -194 val_194 1 -195 val_195 2 -196 val_196 1 -197 val_197 2 -199 val_199 3 -2 val_2 1 -20 val_20 1 -200 val_200 2 -201 val_201 1 -202 val_202 1 -203 val_203 2 -205 val_205 2 -207 val_207 2 -208 val_208 3 -209 val_209 2 -213 val_213 2 -214 val_214 1 -216 val_216 2 -217 val_217 2 -218 val_218 1 -219 val_219 2 -221 val_221 2 -222 val_222 1 -223 val_223 2 -224 val_224 2 -226 val_226 1 -228 val_228 1 -229 val_229 2 -230 val_230 5 -233 val_233 2 -235 val_235 1 -237 val_237 2 -238 val_238 2 -239 val_239 2 -24 val_24 2 -241 val_241 1 -242 val_242 2 -244 val_244 1 -247 val_247 1 -248 val_248 1 -249 val_249 1 -252 val_252 1 -255 val_255 2 -256 val_256 2 -257 val_257 1 -258 val_258 1 -26 val_26 2 -260 val_260 1 -262 val_262 1 -263 val_263 1 -265 val_265 2 -266 val_266 1 -27 val_27 1 -272 val_272 2 -273 val_273 3 -274 val_274 1 -275 val_275 1 -277 val_277 4 -278 val_278 2 -28 val_28 1 -280 val_280 2 -281 val_281 2 -282 val_282 2 -283 val_283 1 -284 val_284 1 -285 val_285 1 -286 val_286 1 -287 val_287 1 -288 val_288 2 -289 val_289 1 -291 val_291 1 -292 val_292 1 -296 val_296 1 -298 val_298 3 -30 val_30 1 -302 val_302 1 -305 val_305 1 -306 val_306 1 -307 val_307 2 -308 val_308 1 -309 val_309 2 -310 val_310 1 -311 val_311 3 -315 val_315 1 -316 val_316 3 -317 val_317 2 -318 val_318 3 -321 val_321 2 -322 val_322 2 -323 val_323 1 -325 val_325 2 -327 val_327 3 -33 val_33 1 -331 val_331 2 -332 val_332 1 -333 val_333 2 -335 val_335 1 -336 val_336 1 -338 val_338 1 -339 val_339 1 -34 val_34 1 -341 val_341 1 -342 val_342 2 -344 val_344 2 -345 val_345 1 -348 val_348 5 -35 val_35 3 -351 val_351 1 -353 val_353 2 -356 val_356 1 -360 val_360 1 -362 val_362 1 -364 val_364 1 -365 val_365 1 -366 val_366 1 -367 val_367 2 -368 val_368 1 -369 val_369 3 -37 val_37 2 -373 val_373 1 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -379 val_379 1 -382 val_382 2 -384 val_384 3 -386 val_386 1 -389 val_389 1 -392 val_392 1 -393 val_393 1 -394 val_394 1 -395 val_395 2 -396 val_396 3 -397 val_397 2 -399 val_399 2 -4 val_4 1 -400 val_400 1 -401 val_401 5 -402 val_402 1 -403 val_403 3 -404 val_404 2 -406 val_406 4 -407 val_407 1 -409 val_409 3 -41 val_41 1 -411 val_411 1 -413 val_413 2 -414 val_414 2 -417 val_417 3 -418 val_418 1 -419 val_419 1 -42 val_42 2 -421 val_421 1 -424 val_424 2 -427 val_427 1 -429 val_429 2 -43 val_43 1 -430 val_430 3 -431 val_431 3 -432 val_432 1 -435 val_435 1 -436 val_436 1 -437 val_437 1 -438 val_438 3 -439 val_439 2 -44 val_44 1 -443 val_443 1 -444 val_444 1 -446 val_446 1 -448 val_448 1 -449 val_449 1 -452 val_452 1 -453 val_453 1 -454 val_454 3 -455 val_455 1 -457 val_457 1 -458 val_458 2 -459 val_459 2 -460 val_460 1 -462 val_462 2 -463 val_463 2 -466 val_466 3 -467 val_467 1 -468 val_468 4 -469 val_469 5 -47 val_47 1 -470 val_470 1 -472 val_472 1 -475 val_475 1 -477 val_477 1 -478 val_478 2 -479 val_479 1 -480 val_480 3 -481 val_481 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -485 val_485 1 -487 val_487 1 -489 val_489 4 -490 val_490 1 -491 val_491 1 -492 val_492 2 -493 val_493 1 -494 val_494 1 -495 val_495 1 -496 val_496 1 -497 val_497 1 -498 val_498 3 -5 val_5 3 -51 val_51 2 -53 val_53 1 -54 val_54 1 -57 val_57 1 -58 val_58 2 -64 val_64 1 -65 val_65 1 -66 val_66 1 -67 val_67 2 -69 val_69 1 -70 val_70 3 -72 val_72 2 -74 val_74 1 -76 val_76 2 -77 val_77 1 -78 val_78 1 -8 val_8 1 -80 val_80 1 -82 val_82 1 -83 val_83 2 -84 val_84 2 -85 val_85 1 -86 val_86 1 -87 val_87 1 -9 val_9 1 -90 val_90 3 -92 val_92 1 -95 val_95 2 -96 val_96 1 -97 val_97 2 -98 val_98 2 PREHOOK: query: EXPLAIN FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -3262,9 +794,13 @@ outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -3314,315 +850,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 PREHOOK: query: SELECT DEST2.* FROM DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -3631,312 +858,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -0 val_0 1 -10 val_10 1 -100 val_100 1 -103 val_103 1 -104 val_104 1 -105 val_105 1 -11 val_11 1 -111 val_111 1 -113 val_113 1 -114 val_114 1 -116 val_116 1 -118 val_118 1 -119 val_119 1 -12 val_12 1 -120 val_120 1 -125 val_125 1 -126 val_126 1 -128 val_128 1 -129 val_129 1 -131 val_131 1 -133 val_133 1 -134 val_134 1 -136 val_136 1 -137 val_137 1 -138 val_138 1 -143 val_143 1 -145 val_145 1 -146 val_146 1 -149 val_149 1 -15 val_15 1 -150 val_150 1 -152 val_152 1 -153 val_153 1 -155 val_155 1 -156 val_156 1 -157 val_157 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -163 val_163 1 -164 val_164 1 -165 val_165 1 -166 val_166 1 -167 val_167 1 -168 val_168 1 -169 val_169 1 -17 val_17 1 -170 val_170 1 -172 val_172 1 -174 val_174 1 -175 val_175 1 -176 val_176 1 -177 val_177 1 -178 val_178 1 -179 val_179 1 -18 val_18 1 -180 val_180 1 -181 val_181 1 -183 val_183 1 -186 val_186 1 -187 val_187 1 -189 val_189 1 -19 val_19 1 -190 val_190 1 -191 val_191 1 -192 val_192 1 -193 val_193 1 -194 val_194 1 -195 val_195 1 -196 val_196 1 -197 val_197 1 -199 val_199 1 -2 val_2 1 -20 val_20 1 -200 val_200 1 -201 val_201 1 -202 val_202 1 -203 val_203 1 -205 val_205 1 -207 val_207 1 -208 val_208 1 -209 val_209 1 -213 val_213 1 -214 val_214 1 -216 val_216 1 -217 val_217 1 -218 val_218 1 -219 val_219 1 -221 val_221 1 -222 val_222 1 -223 val_223 1 -224 val_224 1 -226 val_226 1 -228 val_228 1 -229 val_229 1 -230 val_230 1 -233 val_233 1 -235 val_235 1 -237 val_237 1 -238 val_238 1 -239 val_239 1 -24 val_24 1 -241 val_241 1 -242 val_242 1 -244 val_244 1 -247 val_247 1 -248 val_248 1 -249 val_249 1 -252 val_252 1 -255 val_255 1 -256 val_256 1 -257 val_257 1 -258 val_258 1 -26 val_26 1 -260 val_260 1 -262 val_262 1 -263 val_263 1 -265 val_265 1 -266 val_266 1 -27 val_27 1 -272 val_272 1 -273 val_273 1 -274 val_274 1 -275 val_275 1 -277 val_277 1 -278 val_278 1 -28 val_28 1 -280 val_280 1 -281 val_281 1 -282 val_282 1 -283 val_283 1 -284 val_284 1 -285 val_285 1 -286 val_286 1 -287 val_287 1 -288 val_288 1 -289 val_289 1 -291 val_291 1 -292 val_292 1 -296 val_296 1 -298 val_298 1 -30 val_30 1 -302 val_302 1 -305 val_305 1 -306 val_306 1 -307 val_307 1 -308 val_308 1 -309 val_309 1 -310 val_310 1 -311 val_311 1 -315 val_315 1 -316 val_316 1 -317 val_317 1 -318 val_318 1 -321 val_321 1 -322 val_322 1 -323 val_323 1 -325 val_325 1 -327 val_327 1 -33 val_33 1 -331 val_331 1 -332 val_332 1 -333 val_333 1 -335 val_335 1 -336 val_336 1 -338 val_338 1 -339 val_339 1 -34 val_34 1 -341 val_341 1 -342 val_342 1 -344 val_344 1 -345 val_345 1 -348 val_348 1 -35 val_35 1 -351 val_351 1 -353 val_353 1 -356 val_356 1 -360 val_360 1 -362 val_362 1 -364 val_364 1 -365 val_365 1 -366 val_366 1 -367 val_367 1 -368 val_368 1 -369 val_369 1 -37 val_37 1 -373 val_373 1 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -379 val_379 1 -382 val_382 1 -384 val_384 1 -386 val_386 1 -389 val_389 1 -392 val_392 1 -393 val_393 1 -394 val_394 1 -395 val_395 1 -396 val_396 1 -397 val_397 1 -399 val_399 1 -4 val_4 1 -400 val_400 1 -401 val_401 1 -402 val_402 1 -403 val_403 1 -404 val_404 1 -406 val_406 1 -407 val_407 1 -409 val_409 1 -41 val_41 1 -411 val_411 1 -413 val_413 1 -414 val_414 1 -417 val_417 1 -418 val_418 1 -419 val_419 1 -42 val_42 1 -421 val_421 1 -424 val_424 1 -427 val_427 1 -429 val_429 1 -43 val_43 1 -430 val_430 1 -431 val_431 1 -432 val_432 1 -435 val_435 1 -436 val_436 1 -437 val_437 1 -438 val_438 1 -439 val_439 1 -44 val_44 1 -443 val_443 1 -444 val_444 1 -446 val_446 1 -448 val_448 1 -449 val_449 1 -452 val_452 1 -453 val_453 1 -454 val_454 1 -455 val_455 1 -457 val_457 1 -458 val_458 1 -459 val_459 1 -460 val_460 1 -462 val_462 1 -463 val_463 1 -466 val_466 1 -467 val_467 1 -468 val_468 1 -469 val_469 1 -47 val_47 1 -470 val_470 1 -472 val_472 1 -475 val_475 1 -477 val_477 1 -478 val_478 1 -479 val_479 1 -480 val_480 1 -481 val_481 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -485 val_485 1 -487 val_487 1 -489 val_489 1 -490 val_490 1 -491 val_491 1 -492 val_492 1 -493 val_493 1 -494 val_494 1 -495 val_495 1 -496 val_496 1 -497 val_497 1 -498 val_498 1 -5 val_5 1 -51 val_51 1 -53 val_53 1 -54 val_54 1 -57 val_57 1 -58 val_58 1 -64 val_64 1 -65 val_65 1 -66 val_66 1 -67 val_67 1 -69 val_69 1 -70 val_70 1 -72 val_72 1 -74 val_74 1 -76 val_76 1 -77 val_77 1 -78 val_78 1 -8 val_8 1 -80 val_80 1 -82 val_82 1 -83 val_83 1 -84 val_84 1 -85 val_85 1 -86 val_86 1 -87 val_87 1 -9 val_9 1 -90 val_90 1 -92 val_92 1 -95 val_95 1 -96 val_96 1 -97 val_97 1 -98 val_98 1 Index: ql/src/test/results/clientpositive/ctas_colname.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas_colname.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/ctas_colname.q.out (working copy) @@ -198,7 +198,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -206,7 +206,7 @@ isPivotResult: true Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -360,7 +360,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lead_window_0 arguments: _col0, 1 name: lead window function: GenericUDAFLeadEvaluator @@ -368,7 +368,7 @@ isPivotResult: true Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: string) + expressions: _col0 (type: string), _col1 (type: string), lead_window_0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Index: ql/src/test/results/clientpositive/groupby4.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby4.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby4.q.out (working copy) @@ -33,10 +33,10 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(key, 1, 1) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -66,7 +66,7 @@ Group By Operator keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Index: ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out =================================================================== --- ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (working copy) @@ -37,12 +37,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08' and hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] @@ -50,13 +56,19 @@ select key, value from srcpart where ds='2008-04-08' and hr='12' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') select key, value from srcpart where ds='2008-04-08' and hr='12' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] @@ -64,13 +76,19 @@ select key, value from srcpart where ds='2008-04-09' and hr='11' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') select key, value from srcpart where ds='2008-04-09' and hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] @@ -78,12 +96,18 @@ select key, value from srcpart where ds='2008-04-09' and hr='12' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') select key, value from srcpart where ds='2008-04-09' and hr='12' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] @@ -129,12 +153,18 @@ PREHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@tstsrcpart +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@tstsrcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@tstsrcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@tstsrcpart +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 0 3 PREHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key Index: ql/src/test/results/clientpositive/groupby3_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_noskew.q.out (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/results/clientpositive/groupby3_noskew.q.out (working copy) @@ -46,20 +46,20 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: $f0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/queries/clientpositive/cbo_join.q =================================================================== --- ql/src/test/queries/clientpositive/cbo_join.q (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/test/queries/clientpositive/cbo_join.q (working copy) @@ -4,6 +4,7 @@ set hive.stats.fetch.column.stats=true; set hive.auto.convert.join=false; +-- SORT_QUERY_RESULTS -- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key; select cbo_t1.key from cbo_t1 join cbo_t3; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (working copy) @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -444,4 +445,42 @@ // If the child is also decimal, no cast is needed (we hope - can target type be narrower?). return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory(childTi); } + + /** + * Build ExprNodeColumnDesc for the projections in the input operator from + * sartpos to endpos(both included). Operator must have an associated + * colExprMap. + * + * @param inputOp + * Input Hive Operator + * @param startPos + * starting position in the input operator schema; must be >=0 and <= + * endPos + * @param endPos + * end position in the input operator schema; must be >=0. + * @return List of ExprNodeDesc + */ + public static ArrayList genExprNodeDesc(Operator inputOp, int startPos, int endPos, + boolean addEmptyTabAlias, boolean setColToNonVirtual) { + ArrayList exprColLst = new ArrayList(); + List colInfoLst = inputOp.getSchema().getSignature(); + + String tabAlias; + boolean vc; + ColumnInfo ci; + for (int i = startPos; i <= endPos; i++) { + ci = colInfoLst.get(i); + tabAlias = ci.getTabAlias(); + if (addEmptyTabAlias) { + tabAlias = ""; } + vc = ci.getIsVirtualCol(); + if (setColToNonVirtual) { + vc = false; + } + exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc)); + } + + return exprColLst; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (working copy) @@ -110,6 +110,13 @@ public JoinDesc(final Map> exprs, List outputColumnNames, final boolean noOuterJoin, + final JoinCondDesc[] conds, ExprNodeDesc[][] joinKeys) { + this (exprs, outputColumnNames, noOuterJoin, conds, + new HashMap>(), joinKeys); + } + + public JoinDesc(final Map> exprs, + List outputColumnNames, final boolean noOuterJoin, final JoinCondDesc[] conds, final Map> filters, ExprNodeDesc[][] joinKeys) { this.exprs = exprs; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -531,14 +531,15 @@ Operator child = op.getChildOperators().get(0); - List childCols; + List childCols = null; if (child instanceof CommonJoinOperator) { - childCols = cppCtx.getJoinPrunedColLists().get(child) + childCols = cppCtx.getJoinPrunedColLists().get(child) == null + ? null : cppCtx.getJoinPrunedColLists().get(child) .get((byte) conf.getTag()); } else { childCols = cppCtx.getPrunedColList(child); + } - } List valCols = conf.getValueCols(); List valColNames = conf.getOutputValueColumnNames(); @@ -749,6 +750,7 @@ conf.setOutputColumnNames(newOutputColumnNames); handleChildren(op, cols, cppCtx); } + return null; } @@ -971,12 +973,12 @@ .getChildOperators(); LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs()); + List childColLists = cppCtx.genColLists(op); if (childColLists == null) { return; } - Map> prunedColLists = new HashMap>(); for (byte tag : conf.getTagOrder()) { prunedColLists.put(tag, new ArrayList()); @@ -1076,6 +1078,7 @@ } LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs()); + op.setColumnExprMap(newColExprMap); conf.setOutputColumnNames(outputCols); op.getSchema().setSignature(rs); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java (working copy) @@ -242,4 +242,4 @@ return null; } } -} +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java (revision 1673601) @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.List; + +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTrait; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistributionTraitDef; +import org.apache.calcite.util.mapping.Mappings.TargetMapping; + +public class HiveRelDistribution implements RelDistribution { + + List keys; + RelDistribution.Type type; + + public HiveRelDistribution(Type type, List keys) { + this.type = type; + this.keys = keys; + } + + @Override + public RelTraitDef getTraitDef() { + return RelDistributionTraitDef.INSTANCE; + } + + @Override + public void register(RelOptPlanner planner) { + + } + + @Override + public boolean satisfies(RelTrait trait) { + if (trait == this) { + return true; + } + switch (((RelDistribution)trait).getType()) { + case HASH_DISTRIBUTED : + return this.getKeys().equals(((RelDistribution)trait).getKeys()); + default: + throw new RuntimeException("Other distributions are not used yet."); + } + } + + @Override + public RelDistribution apply(TargetMapping mapping) { + if (keys.isEmpty()) { + return this; + } + return new HiveRelDistribution(type, keys); + } + + @Override + public List getKeys() { + return keys; + } + + @Override + public Type getType() { + return type; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelCollation.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelCollation.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelCollation.java (revision 1673601) @@ -0,0 +1,16 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.RelCollationImpl; +import org.apache.calcite.rel.RelFieldCollation; + +import com.google.common.collect.ImmutableList; + +public class HiveRelCollation extends RelCollationImpl { + + public HiveRelCollation(ImmutableList fieldCollations) { + super(fieldCollations); + } + +} + + Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java (revision 1673601) @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; + + +public class HiveConfigContext implements Context { + private HiveAlgorithmsConf config; + + public HiveConfigContext(HiveAlgorithmsConf config) { + this.config = config; + } + + public T unwrap(Class clazz) { + if (clazz.isInstance(config)) { + return clazz.cast(config); + } + return null; + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java (revision 1673601) @@ -0,0 +1,49 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistributionTraitDef; +import org.apache.calcite.rel.RelInput; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.SortExchange; + +public class HiveSortExchange extends SortExchange { + + private HiveSortExchange(RelOptCluster cluster, RelTraitSet traitSet, + RelNode input, RelDistribution distribution, RelCollation collation) { + super(cluster, traitSet, input, distribution, collation); + } + + public HiveSortExchange(RelInput input) { + super(input); + } + + /** + * Creates a HiveSortExchange. + * + * @param input Input relational expression + * @param distribution Distribution specification + * @param collation Collation specification + */ + public static HiveSortExchange create(RelNode input, + RelDistribution distribution, RelCollation collation) { + RelOptCluster cluster = input.getCluster(); + distribution = RelDistributionTraitDef.INSTANCE.canonize(distribution); + RelTraitSet traitSet = + input.getTraitSet().replace(Convention.NONE).replace(distribution); + collation = RelCollationTraitDef.INSTANCE.canonize(collation); + return new HiveSortExchange(cluster, traitSet, input, distribution, collation); + } + + @Override + public SortExchange copy(RelTraitSet traitSet, RelNode newInput, RelDistribution newDistribution, + RelCollation newCollation) { + return new HiveSortExchange(getCluster(), traitSet, newInput, + newDistribution, newCollation); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java (working copy) @@ -24,9 +24,9 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; public class HiveFilter extends Filter implements HiveRelNode { @@ -48,7 +48,7 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } /** Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java (working copy) @@ -31,7 +31,6 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; import com.google.common.collect.ImmutableList; @@ -39,6 +38,8 @@ public static final HiveAggRelFactory HIVE_AGGR_REL_FACTORY = new HiveAggRelFactory(); + + public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, boolean indicator, ImmutableBitSet groupSet, List groupSets, List aggCalls) throws InvalidRelException { @@ -66,7 +67,7 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } @Override @@ -75,6 +76,11 @@ .makeLiteral(true)); } + public boolean isBucketedInput() { + return RelMetadataQuery.distribution(this.getInput()).getKeys(). + containsAll(groupSet.asList()); + } + private static class HiveAggRelFactory implements AggregateFactory { @Override Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (working copy) @@ -29,6 +29,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -42,7 +43,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; - import com.google.common.collect.ImmutableList; public class HiveProject extends Project implements HiveRelNode { @@ -172,7 +172,7 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (working copy) @@ -17,21 +17,34 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; +import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; + /** * Relational expression representing a scan of a HiveDB collection. * @@ -42,6 +55,14 @@ */ public class HiveTableScan extends TableScan implements HiveRelNode { + private final RelDataType hiveTableScanRowType; + private final ImmutableList neededColIndxsFrmReloptHT; + private final String tblAlias; + + public String getTableAlias() { + return tblAlias; + } + /** * Creates a HiveTableScan. * @@ -54,10 +75,17 @@ * @param table * HiveDB table */ - public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, - RelDataType rowtype) { + public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, String alias) { + this(cluster, traitSet, table, alias, table.getRowType()); + } + + private HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, String alias, + RelDataType newRowtype) { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table); assert getConvention() == HiveRelNode.CONVENTION; + this.tblAlias = alias; + this.hiveTableScanRowType = newRowtype; + this.neededColIndxsFrmReloptHT = buildNeededColIndxsFrmReloptHT(table.getRowType(), newRowtype); } @Override @@ -66,9 +94,21 @@ return this; } + /** + * Copy TableScan operator with a new Row Schema. The new Row Schema can only + * be a subset of this TS schema. + * + * @param newRowtype + * @return + */ + public HiveTableScan copy(RelDataType newRowtype) { + return new HiveTableScan(getCluster(), getTraitSet(), ((RelOptHiveTable) table), this.tblAlias, + newRowtype); + } + @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } @Override @@ -89,4 +129,62 @@ public List getColStat(List projIndxLst) { return ((RelOptHiveTable) table).getColStat(projIndxLst); } -} \ No newline at end of file + + @Override + public RelNode project(ImmutableBitSet fieldsUsed, Set extraFields, + RelFactories.ProjectFactory projectFactory) { + + // 1. If the schema is the same then bail out + final int fieldCount = getRowType().getFieldCount(); + if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) { + return this; + } + + // 2. Make sure there is no dynamic addition of virtual cols + if (extraFields != null && !extraFields.isEmpty()) { + throw new RuntimeException("Hive TS does not support adding virtual columns dynamically"); + } + + // 3. Create new TS schema that is a subset of original + final List fields = getRowType().getFieldList(); + List fieldTypes = new LinkedList(); + List fieldNames = new LinkedList(); + List exprList = new ArrayList(); + RexBuilder rexBuilder = getCluster().getRexBuilder(); + for (int i : fieldsUsed) { + RelDataTypeField field = fields.get(i); + fieldTypes.add(field.getType()); + fieldNames.add(field.getName()); + exprList.add(rexBuilder.makeInputRef(this, i)); + } + + // 4. Build new TS + HiveTableScan newHT = copy(getCluster().getTypeFactory().createStructType(fieldTypes, + fieldNames)); + + // 5. Add Proj on top of TS + return projectFactory.createProject(newHT, exprList, new ArrayList(fieldNames)); + } + + public List getNeededColIndxsFrmReloptHT() { + return neededColIndxsFrmReloptHT; + } + + public RelDataType getPrunedRowType() { + return hiveTableScanRowType; + } + + private static ImmutableList buildNeededColIndxsFrmReloptHT(RelDataType htRowtype, + RelDataType scanRowType) { + Builder neededColIndxsFrmReloptHTBldr = new ImmutableList.Builder(); + Map colNameToPosInReloptHT = HiveCalciteUtil.getRowColNameIndxMap(htRowtype + .getFieldList()); + List colNamesInScanRowType = scanRowType.getFieldNames(); + + for (int i = 0; i < colNamesInScanRowType.size(); i++) { + neededColIndxsFrmReloptHTBldr.add(colNameToPosInReloptHT.get(colNamesInScanRowType.get(i))); + } + + return neededColIndxsFrmReloptHTBldr.build(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java (working copy) @@ -25,9 +25,9 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; public class HiveLimit extends SingleRel implements HiveRelNode { private final RexNode offset; @@ -52,6 +52,6 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java (working copy) @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.Set; import org.apache.calcite.plan.RelOptCluster; @@ -25,7 +27,11 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.RelFactories.JoinFactory; @@ -33,38 +39,38 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel.JoinAlgorithm; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel.DefaultJoinAlgorithm; +import com.google.common.collect.ImmutableList; + //TODO: Should we convert MultiJoin to be a child of HiveJoin public class HiveJoin extends Join implements HiveRelNode { - // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN - // each parallel computation handles multiple splits where as in case of SMB - // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is - // hash joins where MAP_JOIN keeps the whole data set of non streaming tables - // in memory where as BUCKET_JOIN keeps only the b - public enum JoinAlgorithm { - NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN - } + public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); + public enum MapJoinStreamingRelation { NONE, LEFT_RELATION, RIGHT_RELATION } - public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); - private final boolean leftSemiJoin; - private final JoinAlgorithm joinAlgorithm; - //This will be used once we do Join Algorithm selection - @SuppressWarnings("unused") - private final MapJoinStreamingRelation mapJoinStreamingSide = MapJoinStreamingRelation.NONE; + private final JoinPredicateInfo joinPredInfo; + private JoinAlgorithm joinAlgorithm; + private RelOptCost joinCost; + public static HiveJoin getJoin(RelOptCluster cluster, RelNode left, RelNode right, RexNode condition, JoinRelType joinType, boolean leftSemiJoin) { try { Set variablesStopped = Collections.emptySet(); - return new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped, - JoinAlgorithm.NONE, null, leftSemiJoin); + HiveJoin join = new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped, + DefaultJoinAlgorithm.INSTANCE, leftSemiJoin); + return join; } catch (InvalidRelException e) { throw new RuntimeException(e); } @@ -72,10 +78,10 @@ protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, RexNode condition, JoinRelType joinType, Set variablesStopped, - JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin) - throws InvalidRelException { + JoinAlgorithm joinAlgo, boolean leftSemiJoin) throws InvalidRelException { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType, variablesStopped); + this.joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(this); this.joinAlgorithm = joinAlgo; this.leftSemiJoin = leftSemiJoin; } @@ -90,7 +96,7 @@ try { Set variablesStopped = Collections.emptySet(); return new HiveJoin(getCluster(), traitSet, left, right, conditionExpr, joinType, - variablesStopped, JoinAlgorithm.NONE, null, leftSemiJoin); + variablesStopped, joinAlgorithm, leftSemiJoin); } catch (InvalidRelException e) { // Semantic error not possible. Must be a bug. Convert to // internal error. @@ -98,10 +104,99 @@ } } - public JoinAlgorithm getJoinAlgorithm() { - return joinAlgorithm; + public JoinPredicateInfo getJoinPredicateInfo() { + return joinPredInfo; } + public void setJoinAlgorithm(JoinAlgorithm joinAlgorithm) { + this.joinAlgorithm = joinAlgorithm; + } + + public String getJoinAlgorithmName() { + return joinAlgorithm.getName(); + } + + public ImmutableList getCollation() { + return joinAlgorithm.getCollation(this); + } + + public RelDistribution getDistribution() { + return joinAlgorithm.getDistribution(this); + } + + public Double getMemory() { + return joinAlgorithm.getMemory(this); + } + + public Double getCumulativeMemoryWithinPhaseSplit() { + return joinAlgorithm.getCumulativeMemoryWithinPhaseSplit(this); + } + + public Boolean isPhaseTransition() { + return joinAlgorithm.isPhaseTransition(this); + } + + public Integer getSplitCount() { + return joinAlgorithm.getSplitCount(this); + } + + public MapJoinStreamingRelation getStreamingSide() { + Double leftInputSize = RelMetadataQuery.memory(left); + Double rightInputSize = RelMetadataQuery.memory(right); + if (leftInputSize == null && rightInputSize == null) { + return MapJoinStreamingRelation.NONE; + } else if (leftInputSize != null && + (rightInputSize == null || + (leftInputSize < rightInputSize))) { + return MapJoinStreamingRelation.RIGHT_RELATION; + } else if (rightInputSize != null && + (leftInputSize == null || + (rightInputSize <= leftInputSize))) { + return MapJoinStreamingRelation.LEFT_RELATION; + } + return MapJoinStreamingRelation.NONE; + } + + public RelNode getStreamingInput() { + MapJoinStreamingRelation mapJoinStreamingSide = getStreamingSide(); + RelNode smallInput; + if (mapJoinStreamingSide == MapJoinStreamingRelation.LEFT_RELATION) { + smallInput = this.getRight(); + } else if (mapJoinStreamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { + smallInput = this.getLeft(); + } else { + smallInput = null; + } + return smallInput; + } + + public ImmutableBitSet getSortedInputs() { + ImmutableBitSet.Builder sortedInputsBuilder = new ImmutableBitSet.Builder(); + JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo. + constructJoinPredicateInfo(this); + List joinKeysInChildren = new ArrayList(); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema())); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema())); + + for (int i=0; i cardinalities, + ImmutableBitSet sorted) { + // Sort-merge join + double cpuCost = 0.0; + for (int i=0; i> relationInfos) { + // Sort-merge join + double ioCost = 0.0; + for (Pair relationInfo : relationInfos) { + ioCost += computeSortIOCost(relationInfo); + } + return ioCost; + } + + public static double computeSortIOCost(Pair relationInfo) { + // Sort-merge join + double ioCost = 0.0; + double cardinality = relationInfo.left; + double averageTupleSize = relationInfo.right; + // Write cost + ioCost += cardinality * averageTupleSize * LOCAL_WRITE_COST; + // Read cost + ioCost += cardinality * averageTupleSize * LOCAL_READ_COST; + // Net transfer cost + ioCost += cardinality * averageTupleSize * NET_COST; + return ioCost; + } + + public static double computeMapJoinCPUCost( + ImmutableList cardinalities, + ImmutableBitSet streaming) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i cardinalities, + ImmutableBitSet streaming) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i cardinalities) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i maxSize) { + return false; + } + return true; + } + return false; + } + + public static ImmutableList getJoinCollation(JoinPredicateInfo joinPredInfo, + MapJoinStreamingRelation streamingRelation) { + // Compute collations + ImmutableList.Builder collationListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder leftCollationListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder rightCollationListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { + final RelFieldCollation leftFieldCollation = new RelFieldCollation(leftPos); + collationListBuilder.add(leftFieldCollation); + leftCollationListBuilder.add(leftFieldCollation); + } + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { + final RelFieldCollation rightFieldCollation = new RelFieldCollation(rightPos); + collationListBuilder.add(rightFieldCollation); + rightCollationListBuilder.add(rightFieldCollation); + } + } + + // Return join collations + final ImmutableList collation; + switch (streamingRelation) { + case LEFT_RELATION: + collation = ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(leftCollationListBuilder.build()))); + break; + case RIGHT_RELATION: + collation = ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(rightCollationListBuilder.build()))); + break; + default: + collation = ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(collationListBuilder.build()))); + break; + } + return collation; + } + + public static RelDistribution getJoinRedistribution(JoinPredicateInfo joinPredInfo) { + // Compute distribution + ImmutableList.Builder keysListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { + keysListBuilder.add(leftPos); + } + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { + keysListBuilder.add(rightPos); + } + } + return new HiveRelDistribution( + RelDistribution.Type.HASH_DISTRIBUTED, keysListBuilder.build()); + } + + public static RelDistribution getJoinDistribution(JoinPredicateInfo joinPredInfo, + MapJoinStreamingRelation streamingRelation) { + // Compute distribution + ImmutableList.Builder leftKeysListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder rightKeysListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { + leftKeysListBuilder.add(leftPos); + } + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { + rightKeysListBuilder.add(rightPos); + } + } + + RelDistribution distribution = null; + // Keep buckets from the streaming relation + if (streamingRelation == MapJoinStreamingRelation.LEFT_RELATION) { + distribution = new HiveRelDistribution( + RelDistribution.Type.HASH_DISTRIBUTED, leftKeysListBuilder.build()); + } else if (streamingRelation == MapJoinStreamingRelation.RIGHT_RELATION) { + distribution = new HiveRelDistribution( + RelDistribution.Type.HASH_DISTRIBUTED, rightKeysListBuilder.build()); + } + + return distribution; + } + + public static Double getJoinMemory(HiveJoin join) { + return getJoinMemory(join, join.getStreamingSide()); + } + + public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) { + Double memory = 0.0; + if (streamingSide == MapJoinStreamingRelation.NONE || + streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { + // Left side + final Double leftAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double leftRowCount = RelMetadataQuery.getRowCount(join.getLeft()); + if (leftAvgRowSize == null || leftRowCount == null) { + return null; + } + memory += leftAvgRowSize * leftRowCount; + } + if (streamingSide == MapJoinStreamingRelation.NONE || + streamingSide == MapJoinStreamingRelation.LEFT_RELATION) { + // Right side + final Double rightAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + final Double rightRowCount = RelMetadataQuery.getRowCount(join.getRight()); + if (rightAvgRowSize == null || rightRowCount == null) { + return null; + } + memory += rightAvgRowSize * rightRowCount; + } + return memory; + } + + public static Integer getSplitCountWithRepartition(HiveJoin join) { + final Double maxSplitSize = join.getCluster().getPlanner().getContext(). + unwrap(HiveAlgorithmsConf.class).getMaxSplitSize(); + // We repartition: new number of splits + final Double averageRowSize = RelMetadataQuery.getAverageRowSize(join); + final Double rowCount = RelMetadataQuery.getRowCount(join); + if (averageRowSize == null || rowCount == null) { + return null; + } + final Double totalSize = averageRowSize * rowCount; + final Double splitCount = totalSize / maxSplitSize; + return splitCount.intValue(); + } + + public static Integer getSplitCountWithoutRepartition(HiveJoin join) { + RelNode largeInput; + if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { + largeInput = join.getLeft(); + } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { + largeInput = join.getRight(); + } else { + return null; + } + return RelMetadataQuery.splitCount(largeInput); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java (revision 1673601) @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import java.util.Set; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +/** + * Cost model interface. + */ +public abstract class HiveCostModel { + + private static final Log LOG = LogFactory.getLog(HiveCostModel.class); + + private final Set joinAlgorithms; + + + public HiveCostModel(Set joinAlgorithms) { + this.joinAlgorithms = joinAlgorithms; + } + + public abstract RelOptCost getDefaultCost(); + + public abstract RelOptCost getAggregateCost(HiveAggregate aggregate); + + public RelOptCost getJoinCost(HiveJoin join) { + // Select algorithm with min cost + JoinAlgorithm joinAlgorithm = null; + RelOptCost minJoinCost = null; + + if (LOG.isDebugEnabled()) { + LOG.debug("Join algorithm selection for:\n" + RelOptUtil.toString(join)); + } + + for (JoinAlgorithm possibleAlgorithm : this.joinAlgorithms) { + if (!possibleAlgorithm.isExecutable(join)) { + continue; + } + RelOptCost joinCost = possibleAlgorithm.getCost(join); + if (LOG.isDebugEnabled()) { + LOG.debug(possibleAlgorithm + " cost: " + joinCost); + } + if (minJoinCost == null || joinCost.isLt(minJoinCost) ) { + joinAlgorithm = possibleAlgorithm; + minJoinCost = joinCost; + } + } + + if (LOG.isDebugEnabled()) { + LOG.debug(joinAlgorithm + " selected"); + } + + join.setJoinAlgorithm(joinAlgorithm); + join.setJoinCost(minJoinCost); + + return minJoinCost; + } + + /** + * Interface for join algorithm. + */ + public interface JoinAlgorithm { + public String getName(); + public boolean isExecutable(HiveJoin join); + public RelOptCost getCost(HiveJoin join); + public ImmutableList getCollation(HiveJoin join); + public RelDistribution getDistribution(HiveJoin join); + public Double getMemory(HiveJoin join); + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join); + public Boolean isPhaseTransition(HiveJoin join); + public Integer getSplitCount(HiveJoin join); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java (revision 1673601) @@ -0,0 +1,586 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistribution.Type; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; + +/** + * Cost model for Tez execution engine. + */ +public class HiveOnTezCostModel extends HiveCostModel { + + public static final HiveOnTezCostModel INSTANCE = + new HiveOnTezCostModel(); + + private HiveOnTezCostModel() { + super(Sets.newHashSet( + TezCommonJoinAlgorithm.INSTANCE, + TezMapJoinAlgorithm.INSTANCE, + TezBucketJoinAlgorithm.INSTANCE, + TezSMBJoinAlgorithm.INSTANCE)); + } + + @Override + public RelOptCost getDefaultCost() { + return HiveCost.FACTORY.makeZeroCost(); + } + + @Override + public RelOptCost getAggregateCost(HiveAggregate aggregate) { + if (aggregate.isBucketedInput()) { + return HiveCost.FACTORY.makeZeroCost(); + } else { + // 1. Sum of input cardinalities + final Double rCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + if (rCount == null) { + return null; + } + // 2. CPU cost = sorting cost + final double cpuCost = HiveAlgorithmsUtil.computeSortCPUCost(rCount); + // 3. IO cost = cost of writing intermediary results to local FS + + // cost of reading from local FS for transferring to GBy + + // cost of transferring map outputs to GBy operator + final Double rAverageSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); + if (rAverageSize == null) { + return null; + } + final double ioCost = HiveAlgorithmsUtil.computeSortIOCost(new Pair(rCount,rAverageSize)); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + } + + /** + * COMMON_JOIN is Sort Merge Join. Each parallel computation handles multiple + * splits. + */ + public static class TezCommonJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezCommonJoinAlgorithm(); + private static final String ALGORITHM_NAME = "CommonJoin"; + + + @Override + public String getName() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + return true; + } + + @Override + public RelOptCost getCost(HiveJoin join) { + // 1. Sum of input cardinalities + final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + if (leftRCount == null || rightRCount == null) { + return null; + } + final double rCount = leftRCount + rightRCount; + // 2. CPU cost = sorting cost (for each relation) + + // total merge cost + ImmutableList cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + final double cpuCost = HiveAlgorithmsUtil.computeSortMergeCPUCost(cardinalities, join.getSortedInputs()); + // 3. IO cost = cost of writing intermediary results to local FS + + // cost of reading from local FS for transferring to join + + // cost of transferring map outputs to Join operator + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final double ioCost = HiveAlgorithmsUtil.computeSortMergeIOCost(relationInfos); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + MapJoinStreamingRelation.NONE); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo()); + } + + @Override + public Double getMemory(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinMemory(join, MapJoinStreamingRelation.NONE); + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + final Double memoryWithinPhase = + RelMetadataQuery.cumulativeMemoryWithinPhase(join); + final Integer splitCount = RelMetadataQuery.splitCount(join); + if (memoryWithinPhase == null || splitCount == null) { + return null; + } + return memoryWithinPhase / splitCount; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return true; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithRepartition(join); + } + } + + /** + * MAP_JOIN a hash join that keeps the whole data set of non streaming tables + * in memory. + */ + public static class TezMapJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezMapJoinAlgorithm(); + private static final String ALGORITHM_NAME = "MapJoin"; + + + @Override + public String getName() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + final Double maxMemory = join.getCluster().getPlanner().getContext(). + unwrap(HiveAlgorithmsConf.class).getMaxMemory(); + // Check streaming side + RelNode smallInput = join.getStreamingInput(); + if (smallInput == null) { + return false; + } + return HiveAlgorithmsUtil.isFittingIntoMemory(maxMemory, smallInput, 1); + } + + @Override + public RelOptCost getCost(HiveJoin join) { + // 1. Sum of input cardinalities + final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + if (leftRCount == null || rightRCount == null) { + return null; + } + final double rCount = leftRCount + rightRCount; + // 2. CPU cost = HashTable construction cost + + // join cost + ImmutableList cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + switch (join.getStreamingSide()) { + case LEFT_RELATION: + streamingBuilder.set(0); + break; + case RIGHT_RELATION: + streamingBuilder.set(1); + break; + default: + return null; + } + ImmutableBitSet streaming = streamingBuilder.build(); + final double cpuCost = HiveAlgorithmsUtil.computeMapJoinCPUCost(cardinalities, streaming); + // 3. IO cost = cost of transferring small tables to join node * + // degree of parallelism + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final int parallelism = RelMetadataQuery.splitCount(join) == null + ? 1 : RelMetadataQuery.splitCount(join); + final double ioCost = HiveAlgorithmsUtil.computeMapJoinIOCost(relationInfos, streaming, parallelism); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION + || join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) { + return null; + } + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + join.getStreamingSide()); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION + || join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) { + return null; + } + return HiveAlgorithmsUtil.getJoinDistribution(join.getJoinPredicateInfo(), + join.getStreamingSide()); + } + + @Override + public Double getMemory(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinMemory(join); + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + // Check streaming side + RelNode inMemoryInput; + if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { + inMemoryInput = join.getRight(); + } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { + inMemoryInput = join.getLeft(); + } else { + return null; + } + // If simple map join, the whole relation goes in memory + return RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput); + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join); + } + } + + /** + * BUCKET_JOIN is a hash joins where one bucket of the non streaming tables + * is kept in memory at the time. + */ + public static class TezBucketJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezBucketJoinAlgorithm(); + private static final String ALGORITHM_NAME = "BucketJoin"; + + + @Override + public String getName() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + final Double maxMemory = join.getCluster().getPlanner().getContext(). + unwrap(HiveAlgorithmsConf.class).getMaxMemory(); + // Check streaming side + RelNode smallInput = join.getStreamingInput(); + if (smallInput == null) { + return false; + } + // Get key columns + JoinPredicateInfo joinPredInfo = join.getJoinPredicateInfo(); + List joinKeysInChildren = new ArrayList(); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema())); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema())); + + // Requirements: for Bucket, bucketed by their keys on both sides and fitting in memory + // Obtain number of buckets + Integer buckets = RelMetadataQuery.splitCount(smallInput); + if (buckets == null) { + return false; + } + if (!HiveAlgorithmsUtil.isFittingIntoMemory(maxMemory, smallInput, buckets)) { + return false; + } + for (int i=0; i cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + switch (join.getStreamingSide()) { + case LEFT_RELATION: + streamingBuilder.set(0); + break; + case RIGHT_RELATION: + streamingBuilder.set(1); + break; + default: + return null; + } + ImmutableBitSet streaming = streamingBuilder.build(); + final double cpuCost = HiveAlgorithmsUtil.computeBucketMapJoinCPUCost(cardinalities, streaming); + // 3. IO cost = cost of transferring small tables to join node * + // degree of parallelism + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final int parallelism = RelMetadataQuery.splitCount(join) == null + ? 1 : RelMetadataQuery.splitCount(join); + final double ioCost = HiveAlgorithmsUtil.computeBucketMapJoinIOCost(relationInfos, streaming, parallelism); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION + || join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) { + return null; + } + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + join.getStreamingSide()); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo()); + } + + @Override + public Double getMemory(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinMemory(join); + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + // Check streaming side + RelNode inMemoryInput; + if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { + inMemoryInput = join.getRight(); + } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { + inMemoryInput = join.getLeft(); + } else { + return null; + } + // If bucket map join, only a split goes in memory + final Double memoryInput = + RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput); + final Integer splitCount = RelMetadataQuery.splitCount(inMemoryInput); + if (memoryInput == null || splitCount == null) { + return null; + } + return memoryInput / splitCount; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join); + } + } + + /** + * SMB_JOIN is a Sort Merge Join. Each parallel computation handles one bucket. + */ + public static class TezSMBJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezSMBJoinAlgorithm(); + private static final String ALGORITHM_NAME = "SMBJoin"; + + + @Override + public String getName() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + // Requirements: for SMB, sorted by their keys on both sides and bucketed. + // Get key columns + JoinPredicateInfo joinPredInfo = join.getJoinPredicateInfo(); + List joinKeysInChildren = new ArrayList(); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema())); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema())); + + for (int i=0; i cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + switch (join.getStreamingSide()) { + case LEFT_RELATION: + streamingBuilder.set(0); + break; + case RIGHT_RELATION: + streamingBuilder.set(1); + break; + default: + return null; + } + ImmutableBitSet streaming = streamingBuilder.build(); + final double cpuCost = HiveAlgorithmsUtil.computeSMBMapJoinCPUCost(cardinalities); + // 3. IO cost = cost of transferring small tables to join node * + // degree of parallelism + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final int parallelism = RelMetadataQuery.splitCount(join) == null + ? 1 : RelMetadataQuery.splitCount(join); + final double ioCost = HiveAlgorithmsUtil.computeSMBMapJoinIOCost(relationInfos, streaming, parallelism); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + MapJoinStreamingRelation.NONE); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo()); + } + + @Override + public Double getMemory(HiveJoin join) { + return 0.0; + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + final Double memoryWithinPhase = + RelMetadataQuery.cumulativeMemoryWithinPhase(join); + final Integer splitCount = RelMetadataQuery.splitCount(join); + if (memoryWithinPhase == null || splitCount == null) { + return null; + } + return memoryWithinPhase / splitCount; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join); + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java (revision 1673601) @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdPercentageOriginalRows; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +/** + * HiveRelMdCost supplies the implementation of cost model. + */ +public class HiveRelMdCost { + + private final HiveCostModel hiveCostModel; + + public HiveRelMdCost(HiveCostModel hiveCostModel) { + this.hiveCostModel = hiveCostModel; + } + + public RelMetadataProvider getMetadataProvider() { + return ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource(this, + BuiltInMethod.NON_CUMULATIVE_COST.method), + RelMdPercentageOriginalRows.SOURCE)); + } + + public RelOptCost getNonCumulativeCost(HiveAggregate aggregate) { + return hiveCostModel.getAggregateCost(aggregate); + } + + public RelOptCost getNonCumulativeCost(HiveJoin join) { + return hiveCostModel.getJoinCost(join); + } + + // Default case + public RelOptCost getNonCumulativeCost(RelNode rel) { + return hiveCostModel.getDefaultCost(); + } + +} + +// End HiveRelMdCost.java Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java (revision 1673601) @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; + +/** + * Default implementation of the cost model. + * Currently used by MR and Spark execution engines. + */ +public class HiveDefaultCostModel extends HiveCostModel { + + public static final HiveDefaultCostModel INSTANCE = + new HiveDefaultCostModel(); + + private HiveDefaultCostModel() { + super(Sets.newHashSet(DefaultJoinAlgorithm.INSTANCE)); + } + + @Override + public RelOptCost getDefaultCost() { + return HiveCost.FACTORY.makeZeroCost(); + } + + @Override + public RelOptCost getAggregateCost(HiveAggregate aggregate) { + return HiveCost.FACTORY.makeZeroCost(); + } + + + /** + * Default join algorithm. Cost is based on cardinality. + */ + public static class DefaultJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new DefaultJoinAlgorithm(); + private static final String ALGORITHM_NAME = "None"; + + + @Override + public String getName() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + return true; + } + + @Override + public RelOptCost getCost(HiveJoin join) { + double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); + double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + return null; + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return null; + } + + @Override + public Double getMemory(HiveJoin join) { + return null; + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + return null; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return null; + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java (revision 1673601) @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +public class HiveAlgorithmsConf { + + private Double maxSplitSize; + private Double maxMemory; + + + public HiveAlgorithmsConf(Double maxSplitSize, Double maxMemory) { + this.maxSplitSize = maxSplitSize; + this.maxMemory = maxMemory; + } + + public Double getMaxSplitSize() { + return maxSplitSize; + } + + public Double getMaxMemory() { + return maxMemory; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java (working copy) @@ -90,22 +90,17 @@ return io; } - // TODO: If two cost is equal, could we do any better than comparing - // cardinality (may be some other heuristics to break the tie) public boolean isLe(RelOptCost other) { - return this == other || this.rowCount <= other.getRows(); - /* - * if (((this.dCpu + this.dIo) < (other.getCpu() + other.getIo())) || - * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()) && this.dRows - * <= other.getRows())) { return true; } else { return false; } - */ + if ( (this.cpu + this.io < other.getCpu() + other.getIo()) || + ((this.cpu + this.io == other.getCpu() + other.getIo()) && + (this.rowCount <= other.getRows()))) { + return true; } + return false; + } public boolean isLt(RelOptCost other) { - return this.rowCount < other.getRows(); - /* - * return isLe(other) && !equals(other); - */ + return isLe(other) && !equals(other); } public double getRows() { @@ -113,21 +108,14 @@ } public boolean equals(RelOptCost other) { - return (this == other) || ((this.rowCount) == (other.getRows())); - - /* - * //TODO: should we consider cardinality as well? return (this == other) || - * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo())); - */ + return (this == other) || + ((this.cpu + this.io == other.getCpu() + other.getIo()) && + (this.rowCount == other.getRows())); } public boolean isEqWithEpsilon(RelOptCost other) { - return (this == other) || (Math.abs((this.rowCount) - (other.getRows())) < RelOptUtil.EPSILON); - // Turn this one once we do the Algorithm selection in CBO - /* - * return (this == other) || (Math.abs((this.dCpu + this.dIo) - - * (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); - */ + return (this == other) || (Math.abs((this.cpu + this.io) - + (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); } public RelOptCost minus(RelOptCost other) { @@ -135,8 +123,8 @@ return this; } - return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), this.io - - other.getIo()); + return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), + this.io - other.getIo()); } public RelOptCost multiplyBy(double factor) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java (working copy) @@ -22,6 +22,7 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.volcano.VolcanoPlanner; import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; /** * Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive. @@ -34,12 +35,12 @@ private static final boolean ENABLE_COLLATION_TRAIT = true; /** Creates a HiveVolcanoPlanner. */ - public HiveVolcanoPlanner() { - super(HiveCost.FACTORY, null); + public HiveVolcanoPlanner(HiveConfigContext conf) { + super(HiveCost.FACTORY, conf); } - public static RelOptPlanner createPlanner() { - final VolcanoPlanner planner = new HiveVolcanoPlanner(); + public static RelOptPlanner createPlanner(HiveConfigContext conf) { + final VolcanoPlanner planner = new HiveVolcanoPlanner(conf); planner.addRelTraitDef(ConventionTraitDef.INSTANCE); if (ENABLE_COLLATION_TRAIT) { planner.addRelTraitDef(RelCollationTraitDef.INSTANCE); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java (working copy) @@ -18,26 +18,160 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.cost; import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import com.google.common.collect.ImmutableList; + // Use this once we have Join Algorithm selection public class HiveCostUtil { - private static final double cpuCostInNanoSec = 1.0; - private static final double netCostInNanoSec = 150 * cpuCostInNanoSec; - private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec; - private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec; - private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec; - @SuppressWarnings("unused") -//Use this once we have Join Algorithm selection - private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec; + private static final double CPU_COST = 1.0; + private static final double NET_COST = 150.0 * CPU_COST; + private static final double LOCAL_WRITE_COST = 4.0 * NET_COST; + private static final double LOCAL_READ_COST = 4.0 * NET_COST; + private static final double HDFS_WRITE_COST = 10.0 * LOCAL_WRITE_COST; + private static final double HDFS_READ_COST = 1.5 * LOCAL_READ_COST; + public static RelOptCost computCardinalityBasedCost(HiveRelNode hr) { return new HiveCost(hr.getRows(), 0, 0); } public static HiveCost computeCost(HiveTableScan t) { double cardinality = t.getRows(); - return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0); + return new HiveCost(cardinality, 0, HDFS_WRITE_COST * cardinality * 0); } + + public static double computeSortMergeCPUCost( + ImmutableList cardinalities, + ImmutableBitSet sorted) { + // Sort-merge join + double cpuCost = 0.0; + for (int i=0; i> relationInfos) { + // Sort-merge join + double ioCost = 0.0; + for (Pair relationInfo : relationInfos) { + ioCost += computeSortIOCost(relationInfo); + } + return ioCost; + } + + public static double computeSortIOCost(Pair relationInfo) { + // Sort-merge join + double ioCost = 0.0; + double cardinality = relationInfo.left; + double averageTupleSize = relationInfo.right; + // Write cost + ioCost += cardinality * averageTupleSize * LOCAL_WRITE_COST; + // Read cost + ioCost += cardinality * averageTupleSize * LOCAL_READ_COST; + // Net transfer cost + ioCost += cardinality * averageTupleSize * NET_COST; + return ioCost; + } + + public static double computeMapJoinCPUCost( + ImmutableList cardinalities, + ImmutableBitSet streaming) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i cardinalities, + ImmutableBitSet streaming) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i cardinalities) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i hiveNonPartitionCols; + private final ImmutableList hivePartitionCols; private final ImmutableMap hiveNonPartitionColsMap; private final ImmutableMap hivePartitionColsMap; - private final int noOfProjs; + private final ImmutableList hiveVirtualCols; + private final int noOfNonVirtualCols; final HiveConf hiveConf; private double rowCount = -1; @@ -67,37 +78,65 @@ PrunedPartitionList partitionList; Map partitionCache; AtomicInteger noColsMissingStats; + private final String qbID; protected static final Log LOG = LogFactory .getLog(RelOptHiveTable.class .getName()); - public RelOptHiveTable(RelOptSchema calciteSchema, String qualifiedTblName, String tblAlias, RelDataType rowType, - Table hiveTblMetadata, List hiveNonPartitionCols, - List hivePartitionCols, HiveConf hconf, Map partitionCache, AtomicInteger noColsMissingStats) { + public RelOptHiveTable(RelOptSchema calciteSchema, String qualifiedTblName, + RelDataType rowType, Table hiveTblMetadata, List hiveNonPartitionCols, + List hivePartitionCols, List hiveVirtualCols, HiveConf hconf, + Map partitionCache, AtomicInteger noColsMissingStats, + String qbID) { super(calciteSchema, qualifiedTblName, rowType); this.hiveTblMetadata = hiveTblMetadata; - this.tblAlias = tblAlias; this.hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols); - this.hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0); - this.hivePartitionColsMap = getColInfoMap(hivePartitionCols, hiveNonPartitionColsMap.size()); - this.noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size(); + this.hiveNonPartitionColsMap = HiveCalciteUtil.getColInfoMap(hiveNonPartitionCols, 0); + this.hivePartitionCols = ImmutableList.copyOf(hivePartitionCols); + this.hivePartitionColsMap = HiveCalciteUtil.getColInfoMap(hivePartitionCols, hiveNonPartitionColsMap.size()); + this.noOfNonVirtualCols = hiveNonPartitionCols.size() + hivePartitionCols.size(); + this.hiveVirtualCols = ImmutableList.copyOf(hiveVirtualCols); this.hiveConf = hconf; this.partitionCache = partitionCache; this.noColsMissingStats = noColsMissingStats; + this.qbID = qbID; } - private static ImmutableMap getColInfoMap(List hiveCols, - int startIndx) { - Builder bldr = ImmutableMap. builder(); + public RelOptHiveTable copy(RelDataType newRowType) { + // 1. Build map of column name to col index of original schema + // Assumption: Hive Table can not contain duplicate column names + Map nameToColIndxMap = new HashMap(); + for (RelDataTypeField f : this.rowType.getFieldList()) { + nameToColIndxMap.put(f.getName(), f.getIndex()); + } - int indx = startIndx; - for (ColumnInfo ci : hiveCols) { - bldr.put(indx, ci); - indx++; + // 2. Build nonPart/Part/Virtual column info for new RowSchema + List newHiveNonPartitionCols = new ArrayList(); + List newHivePartitionCols = new ArrayList(); + List newHiveVirtualCols = new ArrayList(); + Map virtualColInfoMap = HiveCalciteUtil.getVColsMap(this.hiveVirtualCols, + this.noOfNonVirtualCols); + Integer originalColIndx; + ColumnInfo cInfo; + VirtualColumn vc; + for (RelDataTypeField f : newRowType.getFieldList()) { + originalColIndx = nameToColIndxMap.get(f.getName()); + if ((cInfo = hiveNonPartitionColsMap.get(originalColIndx)) != null) { + newHiveNonPartitionCols.add(new ColumnInfo(cInfo)); + } else if ((cInfo = hivePartitionColsMap.get(originalColIndx)) != null) { + newHivePartitionCols.add(new ColumnInfo(cInfo)); + } else if ((vc = virtualColInfoMap.get(originalColIndx)) != null) { + newHiveVirtualCols.add(vc); + } else { + throw new RuntimeException("Copy encountered a column not seen in original TS"); } + } - return bldr.build(); + // 3. Build new Table + return new RelOptHiveTable(this.schema, this.name, newRowType, + this.hiveTblMetadata, newHiveNonPartitionCols, newHivePartitionCols, newHiveVirtualCols, + this.hiveConf, this.partitionCache, this.noColsMissingStats, qbID); } @Override @@ -116,16 +155,57 @@ } @Override + public List getCollationList() { + ImmutableList.Builder collationList = new ImmutableList.Builder(); + for (Order sortColumn : this.hiveTblMetadata.getSortCols()) { + for (int i=0; i() + .add(RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(collationList.build()))) + .build(); + } + + @Override + public RelDistribution getDistribution() { + ImmutableList.Builder columnPositions = new ImmutableList.Builder(); + for (String bucketColumn : this.hiveTblMetadata.getBucketCols()) { + for (int i=0; i rowCounts = StatsUtils.getBasicStatForPartitions( - hiveTblMetadata, partitionList.getNotDeniedPartns(), - StatsSetupConst.ROW_COUNT); + List rowCounts = StatsUtils.getBasicStatForPartitions(hiveTblMetadata, + partitionList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT); rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts); } else { @@ -143,19 +223,6 @@ return hiveTblMetadata; } - public String getTableAlias() { - // NOTE: Calcite considers tbls to be equal if their names are the same. Hence - // we need to provide Calcite the fully qualified table name (dbname.tblname) - // and not the user provided aliases. - // However in HIVE DB name can not appear in select list; in case of join - // where table names differ only in DB name, Hive would require user - // introducing explicit aliases for tbl. - if (tblAlias == null) - return hiveTblMetadata.getTableName(); - else - return tblAlias; - } - private String getColNamesForLogging(Set colLst) { StringBuffer sb = new StringBuffer(); boolean firstEntry = true; @@ -173,22 +240,27 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode) { try { - if (!hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) { - // there is no predicate on partitioning column, we need all partitions in this case. - partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), partitionCache); + if (!hiveTblMetadata.isPartitioned() || pruneNode == null + || InputFinder.bits(pruneNode).length() == 0) { + // there is no predicate on partitioning column, we need all partitions + // in this case. + partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), + partitionCache); return; } // We have valid pruning expressions, only retrieve qualifying partitions - ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true, getRelOptSchema().getTypeFactory())); + ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), + true, this.getRelOptSchema().getTypeFactory())); - partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache); + partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), + partitionCache); } catch (HiveException he) { throw new RuntimeException(he); } } - private void updateColStats(Set projIndxLst) { + private void updateColStats(Set projIndxLst, boolean allowNullColumnForMissingStats) { List nonPartColNamesThatRqrStats = new ArrayList(); List nonPartColIndxsThatRqrStats = new ArrayList(); List partColNamesThatRqrStats = new ArrayList(); @@ -289,10 +361,10 @@ if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) { ColStatistics cStats = null; for (int i = 0; i < partColNamesThatRqrStats.size(); i++) { - cStats = new ColStatistics(hiveTblMetadata.getTableName(), - partColNamesThatRqrStats.get(i), hivePartitionColsMap.get( - partColIndxsThatRqrStats.get(i)).getTypeName()); - cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(),partColNamesThatRqrStats.get(i))); + cStats = new ColStatistics(hiveTblMetadata.getTableName(), partColNamesThatRqrStats.get(i), + hivePartitionColsMap.get(partColIndxsThatRqrStats.get(i)).getTypeName()); + cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(), + partColNamesThatRqrStats.get(i))); hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats); } } @@ -301,11 +373,15 @@ if (!colNamesFailedStats.isEmpty()) { String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " + getColNamesForLogging(colNamesFailedStats); + noColsMissingStats.getAndAdd(colNamesFailedStats.size()); + if (allowNullColumnForMissingStats) { + LOG.warn(logMsg); + } else { LOG.error(logMsg); - noColsMissingStats.getAndAdd(colNamesFailedStats.size()); throw new RuntimeException(logMsg); } } + } private int getDistinctCount(Set partitions, String partColName) { Set distinctVals = new HashSet(partitions.size()); @@ -316,32 +392,34 @@ } public List getColStat(List projIndxLst) { - ImmutableList.Builder colStatsBldr = ImmutableList. builder(); + return getColStat(projIndxLst, false); + } + public List getColStat(List projIndxLst, boolean allowNullColumnForMissingStats) { + List colStatsBldr = Lists.newArrayList(); + if (projIndxLst != null) { - updateColStats(new HashSet(projIndxLst)); + updateColStats(new HashSet(projIndxLst), allowNullColumnForMissingStats); for (Integer i : projIndxLst) { colStatsBldr.add(hiveColStatsMap.get(i)); } } else { List pILst = new ArrayList(); - for (Integer i = 0; i < noOfProjs; i++) { + for (Integer i = 0; i < noOfNonVirtualCols; i++) { pILst.add(i); } - updateColStats(new HashSet(pILst)); + updateColStats(new HashSet(pILst), allowNullColumnForMissingStats); for (Integer pi : pILst) { colStatsBldr.add(hiveColStatsMap.get(pi)); } } - return colStatsBldr.build(); + return colStatsBldr; } /* - * use to check if a set of columns are all partition columns. - * true only if: - * - all columns in BitSet are partition - * columns. + * use to check if a set of columns are all partition columns. true only if: - + * all columns in BitSet are partition columns. */ public boolean containsPartitionColumnsOnly(ImmutableBitSet cols) { @@ -352,4 +430,32 @@ } return true; } + + public List getVirtualCols() { + return this.hiveVirtualCols; } + + public List getPartColumns() { + return this.hivePartitionCols; + } + + public List getNonPartColumns() { + return this.hiveNonPartitionCols; + } + + public String getQBID() { + return qbID; + } + + public int getNoOfNonVirtualCols() { + return noOfNonVirtualCols; + } + + public Map getPartColInfoMap() { + return hivePartitionColsMap; + } + + public Map getNonPartColInfoMap() { + return hiveNonPartitionColsMap; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java (revision 1673601) @@ -0,0 +1,892 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistribution.Type; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.SortExchange; +import org.apache.calcite.rel.logical.LogicalExchange; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.parse.JoinCond; +import org.apache.hadoop.hive.ql.parse.JoinType; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; +import org.apache.hadoop.hive.ql.parse.PTFTranslator; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.UnparseTranslator; +import org.apache.hadoop.hive.ql.parse.WindowingComponentizer; +import org.apache.hadoop.hive.ql.parse.WindowingSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.plan.UnionDesc; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +public class HiveOpConverter { + + private static final Log LOG = LogFactory.getLog(HiveOpConverter.class); + + public static enum HIVEAGGOPMODE { + NO_SKEW_NO_MAP_SIDE_AGG, // Corresponds to SemAnalyzer genGroupByPlan1MR + SKEW_NO_MAP_SIDE_AGG, // Corresponds to SemAnalyzer genGroupByPlan2MR + NO_SKEW_MAP_SIDE_AGG, // Corresponds to SemAnalyzer + // genGroupByPlanMapAggrNoSkew + SKEW_MAP_SIDE_AGG // Corresponds to SemAnalyzer genGroupByPlanMapAggr2MR + }; + + // TODO: remove this after stashing only rqd pieces from opconverter + private final SemanticAnalyzer semanticAnalyzer; + private final HiveConf hiveConf; + private final UnparseTranslator unparseTranslator; + private final Map> topOps; + private final boolean strictMode; + private int reduceSinkTagGenerator; + + public HiveOpConverter(SemanticAnalyzer semanticAnalyzer, HiveConf hiveConf, + UnparseTranslator unparseTranslator, Map> topOps, + boolean strictMode) { + this.semanticAnalyzer = semanticAnalyzer; + this.hiveConf = hiveConf; + this.unparseTranslator = unparseTranslator; + this.topOps = topOps; + this.strictMode = strictMode; + this.reduceSinkTagGenerator = 0; + } + + static class OpAttr { + final String tabAlias; + ImmutableList inputs; + ImmutableMap vcolMap; + + OpAttr(String tabAlias, Map vcolMap, Operator... inputs) { + this.tabAlias = tabAlias; + this.vcolMap = ImmutableMap.copyOf(vcolMap); + this.inputs = ImmutableList.copyOf(inputs); + } + + private OpAttr clone(Operator... inputs) { + return new OpAttr(tabAlias, this.vcolMap, inputs); + } + } + + public Operator convert(RelNode root) throws SemanticException { + OpAttr opAf = dispatch(root); + return opAf.inputs.get(0); + } + + OpAttr dispatch(RelNode rn) throws SemanticException { + if (rn instanceof HiveTableScan) { + return visit((HiveTableScan) rn); + } else if (rn instanceof HiveProject) { + return visit((HiveProject) rn); + } else if (rn instanceof HiveJoin) { + return visit((HiveJoin) rn); + } else if (rn instanceof SemiJoin) { + SemiJoin sj = (SemiJoin) rn; + HiveJoin hj = HiveJoin.getJoin(sj.getCluster(), sj.getLeft(), sj.getRight(), + sj.getCondition(), sj.getJoinType(), true); + return visit(hj); + } else if (rn instanceof HiveFilter) { + return visit((HiveFilter) rn); + } else if (rn instanceof HiveSort) { + return visit((HiveSort) rn); + } else if (rn instanceof HiveUnion) { + return visit((HiveUnion) rn); + } else if (rn instanceof SortExchange) { + return visit((SortExchange) rn); + } else if (rn instanceof HiveAggregate) { + return visit((HiveAggregate) rn); + } + LOG.error(rn.getClass().getCanonicalName() + "operator translation not supported" + + " yet in return path."); + return null; + } + + /** + * TODO: 1. PPD needs to get pushed in to TS + * + * @param scanRel + * @return + */ + OpAttr visit(HiveTableScan scanRel) { + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + + " with row type: [" + scanRel.getRowType() + "]"); + } + + RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable(); + + // 1. Setup TableScan Desc + // 1.1 Build col details used by scan + ArrayList colInfos = new ArrayList(); + List virtualCols = new ArrayList(ht.getVirtualCols()); + Map hiveScanVColMap = new HashMap(); + List partColNames = new ArrayList(); + List neededColumnIDs = new ArrayList(); + List neededColumns = new ArrayList(); + + Map posToVColMap = HiveCalciteUtil.getVColsMap(virtualCols, + ht.getNoOfNonVirtualCols()); + Map posToPartColInfo = ht.getPartColInfoMap(); + Map posToNonPartColInfo = ht.getNonPartColInfoMap(); + List neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT(); + List scanColNames = scanRel.getRowType().getFieldNames(); + String tableAlias = scanRel.getTableAlias(); + + String colName; + ColumnInfo colInfo; + VirtualColumn vc; + Integer posInRHT; + + for (int i = 0; i < neededColIndxsFrmReloptHT.size(); i++) { + colName = scanColNames.get(i); + posInRHT = neededColIndxsFrmReloptHT.get(i); + if (posToVColMap.containsKey(posInRHT)) { + vc = posToVColMap.get(posInRHT); + virtualCols.add(vc); + colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden()); + hiveScanVColMap.put(i, vc); + } else if (posToPartColInfo.containsKey(posInRHT)) { + partColNames.add(colName); + colInfo = posToPartColInfo.get(posInRHT); + } else { + colInfo = posToNonPartColInfo.get(posInRHT); + } + neededColumnIDs.add(posInRHT); + neededColumns.add(colName); + colInfos.add(colInfo); + } + + // 1.2 Create TableScanDesc + TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD()); + + // 1.3. Set Partition cols in TSDesc + tsd.setPartColumns(partColNames); + + // 1.4. Set needed cols in TSDesc + tsd.setNeededColumnIDs(neededColumnIDs); + tsd.setNeededColumns(neededColumns); + + // 2. Setup TableScan + TableScanOperator ts = (TableScanOperator) OperatorFactory.get(tsd, new RowSchema(colInfos)); + + topOps.put(ht.getQBID(), ts); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]"); + } + + return new OpAttr(tableAlias, hiveScanVColMap, ts); + } + + OpAttr visit(HiveProject projectRel) throws SemanticException { + OpAttr inputOpAf = dispatch(projectRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + projectRel.getId() + ":" + + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]"); + } + + WindowingSpec windowingSpec = new WindowingSpec(); + List exprCols = new ArrayList(); + for (int pos = 0; pos < projectRel.getChildExps().size(); pos++) { + ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel + .getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), + projectRel.getRowType(), false, projectRel.getCluster().getTypeFactory()); + exprCols.add(projectRel.getChildExps().get(pos).accept(converter)); + if (converter.getWindowFunctionSpec() != null) { + windowingSpec.addWindowFunction(converter.getWindowFunctionSpec()); + } + } + if (windowingSpec.getWindowExpressions() != null + && !windowingSpec.getWindowExpressions().isEmpty()) { + inputOpAf = genPTF(inputOpAf, windowingSpec); + } + // TODO: is this a safe assumption (name collision, external names...) + List exprNames = new ArrayList(projectRel.getRowType().getFieldNames()); + SelectDesc sd = new SelectDesc(exprCols, exprNames); + Pair, Map> colInfoVColPair = createColInfos( + projectRel.getChildExps(), exprCols, exprNames, inputOpAf); + SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema( + colInfoVColPair.getKey()), inputOpAf.inputs.get(0)); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]"); + } + + return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp); + } + + OpAttr visit(HiveJoin joinRel) throws SemanticException { + // 1. Convert inputs + OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()]; + List> children = new ArrayList>(joinRel.getInputs().size()); + for (int i = 0; i < inputs.length; i++) { + inputs[i] = dispatch(joinRel.getInput(i)); + children.add(inputs[i].inputs.get(0)); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + + " with row type: [" + joinRel.getRowType() + "]"); + } + + // 2. Convert join condition + JoinPredicateInfo joinPredInfo = JoinPredicateInfo.constructJoinPredicateInfo(joinRel); + + // 3. Extract join keys from condition + ExprNodeDesc[][] joinKeys = extractJoinKeys(joinPredInfo, joinRel.getInputs()); + + // 4. Generate Join operator + JoinOperator joinOp = genJoin(joinRel, joinPredInfo, children, joinKeys); + + // 5. TODO: Extract condition for non-equi join elements (if any) and + // add it + + // 6. Virtual columns + Map vcolMap = new HashMap(); + vcolMap.putAll(inputs[0].vcolMap); + if (extractJoinType(joinRel) != JoinType.LEFTSEMI) { + int shift = inputs[0].inputs.get(0).getSchema().getSignature().size(); + for (int i = 1; i < inputs.length; i++) { + vcolMap.putAll(HiveCalciteUtil.shiftVColsMap(inputs[i].vcolMap, shift)); + shift += inputs[i].inputs.get(0).getSchema().getSignature().size(); + } + } + + // 8. Return result + return new OpAttr(null, vcolMap, joinOp); + } + + OpAttr visit(HiveAggregate aggRel) throws SemanticException { + OpAttr inputOpAf = dispatch(aggRel.getInput()); + return HiveGBOpConvUtil.translateGB(inputOpAf, aggRel, hiveConf); + } + + OpAttr visit(HiveSort sortRel) throws SemanticException { + OpAttr inputOpAf = dispatch(sortRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " with row type: [" + sortRel.getRowType() + "]"); + if (sortRel.getCollation() == RelCollations.EMPTY) { + LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " consists of limit"); + } else if (sortRel.fetch == null) { + LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " consists of sort"); + } else { + LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " consists of sort+limit"); + } + } + + Operator inputOp = inputOpAf.inputs.get(0); + Operator resultOp = inputOpAf.inputs.get(0); + // 1. If we need to sort tuples based on the value of some + // of their columns + if (sortRel.getCollation() != RelCollations.EMPTY) { + + // In strict mode, in the presence of order by, limit must be + // specified + if (strictMode && sortRel.fetch == null) { + throw new SemanticException(ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg()); + } + + // 1.a. Extract order for each column from collation + // Generate sortCols and order + List sortCols = new ArrayList(); + StringBuilder order = new StringBuilder(); + for (RelCollation collation : sortRel.getCollationList()) { + for (RelFieldCollation sortInfo : collation.getFieldCollations()) { + int sortColumnPos = sortInfo.getFieldIndex(); + ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature() + .get(sortColumnPos)); + ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), + columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol()); + sortCols.add(sortColumn); + if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) { + order.append("-"); + } else { + order.append("+"); + } + } + } + // Use only 1 reducer for order by + int numReducers = 1; + + // 1.b. Generate reduce sink and project operator + resultOp = genReduceSinkAndBacktrackSelect(resultOp, + sortCols.toArray(new ExprNodeDesc[sortCols.size()]), -1, new ArrayList(), + order.toString(), numReducers, Operation.NOT_ACID, strictMode); + } + + // 2. If we need to generate limit + if (sortRel.fetch != null) { + int limit = RexLiteral.intValue(sortRel.fetch); + LimitDesc limitDesc = new LimitDesc(limit); + // TODO: Set 'last limit' global property + ArrayList cinfoLst = createColInfos(inputOp); + resultOp = OperatorFactory.getAndMakeChild(limitDesc, + new RowSchema(cinfoLst), resultOp); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]"); + } + } + + // 3. Return result + return inputOpAf.clone(resultOp); + } + + /** + * TODO: 1) isSamplingPred 2) sampleDesc 3) isSortedFilter + */ + OpAttr visit(HiveFilter filterRel) throws SemanticException { + OpAttr inputOpAf = dispatch(filterRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + filterRel.getId() + ":" + filterRel.getRelTypeName() + + " with row type: [" + filterRel.getRowType() + "]"); + } + + ExprNodeDesc filCondExpr = filterRel.getCondition().accept( + new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), false, + filterRel.getCluster().getTypeFactory())); + FilterDesc filDesc = new FilterDesc(filCondExpr, false); + ArrayList cinfoLst = createColInfos(inputOpAf.inputs.get(0)); + FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, new RowSchema( + cinfoLst), inputOpAf.inputs.get(0)); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + filOp + " with row schema: [" + filOp.getSchema() + "]"); + } + + return inputOpAf.clone(filOp); + } + + OpAttr visit(HiveUnion unionRel) throws SemanticException { + // 1. Convert inputs + OpAttr[] inputs = new OpAttr[unionRel.getInputs().size()]; + for (int i = 0; i < inputs.length; i++) { + inputs[i] = dispatch(unionRel.getInput(i)); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + + " with row type: [" + unionRel.getRowType() + "]"); + } + + // 2. Create a new union operator + UnionDesc unionDesc = new UnionDesc(); + unionDesc.setNumInputs(inputs.length); + ArrayList cinfoLst = createColInfos(inputs[0].inputs.get(0)); + Operator[] children = new Operator[inputs.length]; + for (int i = 0; i < children.length; i++) { + children[i] = inputs[i].inputs.get(0); + } + Operator unionOp = OperatorFactory.getAndMakeChild(unionDesc, + new RowSchema(cinfoLst), children); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]"); + } + + // 3. Return result + return inputs[0].clone(unionOp); + } + + OpAttr visit(SortExchange exchangeRel) throws SemanticException { + OpAttr inputOpAf = dispatch(exchangeRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]"); + } + + RelDistribution distribution = exchangeRel.getDistribution(); + if (distribution.getType() != Type.HASH_DISTRIBUTED) { + throw new SemanticException("Only hash distribution supported for LogicalExchange"); + } + ExprNodeDesc[] expressions = new ExprNodeDesc[distribution.getKeys().size()]; + for (int i = 0; i < distribution.getKeys().size(); i++) { + int key = distribution.getKeys().get(i); + ColumnInfo colInfo = inputOpAf.inputs.get(0).getSchema().getSignature().get(key); + ExprNodeDesc column = new ExprNodeColumnDesc(colInfo); + expressions[i] = column; + } + + ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), expressions, + reduceSinkTagGenerator++, -1, Operation.NOT_ACID, strictMode); + + return inputOpAf.clone(rsOp); + } + + private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticException { + Operator input = inputOpAf.inputs.get(0); + + wSpec.validateAndMakeEffective(); + WindowingComponentizer groups = new WindowingComponentizer(wSpec); + RowResolver rr = new RowResolver(); + for (ColumnInfo ci : input.getSchema().getSignature()) { + rr.put(ci.getTabAlias(), ci.getInternalName(), ci); + } + + while (groups.hasNext()) { + wSpec = groups.next(hiveConf, semanticAnalyzer, unparseTranslator, rr); + + // 1. Create RS and backtrack Select operator on top + ArrayList keyCols = new ArrayList(); + ArrayList partCols = new ArrayList(); + StringBuilder order = new StringBuilder(); + + for (PartitionExpression partCol : wSpec.getQueryPartitionSpec().getExpressions()) { + ExprNodeDesc partExpr = semanticAnalyzer.genExprNodeDesc(partCol.getExpression(), rr); + if (ExprNodeDescUtils.indexOf(partExpr, partCols) < 0) { + keyCols.add(partExpr); + partCols.add(partExpr); + order.append('+'); + } + } + + if (wSpec.getQueryOrderSpec() != null) { + for (OrderExpression orderCol : wSpec.getQueryOrderSpec().getExpressions()) { + ExprNodeDesc orderExpr = semanticAnalyzer.genExprNodeDesc(orderCol.getExpression(), rr); + char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-'; + int index = ExprNodeDescUtils.indexOf(orderExpr, keyCols); + if (index >= 0) { + order.setCharAt(index, orderChar); + continue; + } + keyCols.add(orderExpr); + order.append(orderChar); + } + } + + SelectOperator selectOp = genReduceSinkAndBacktrackSelect(input, + keyCols.toArray(new ExprNodeDesc[keyCols.size()]), reduceSinkTagGenerator++, partCols, + order.toString(), -1, Operation.NOT_ACID, strictMode); + + // 2. Finally create PTF + PTFTranslator translator = new PTFTranslator(); + PTFDesc ptfDesc = translator.translate(wSpec, semanticAnalyzer, hiveConf, rr, + unparseTranslator); + RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr(); + + Operator ptfOp = OperatorFactory.getAndMakeChild(ptfDesc, + new RowSchema(ptfOpRR.getColumnInfos()), selectOp); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + ptfOp + " with row schema: [" + ptfOp.getSchema() + "]"); + } + + // 3. Prepare for next iteration (if any) + rr = ptfOpRR; + input = ptfOp; + } + + return inputOpAf.clone(input); + } + + private ExprNodeDesc[][] extractJoinKeys(JoinPredicateInfo joinPredInfo, List inputs) { + ExprNodeDesc[][] joinKeys = new ExprNodeDesc[inputs.size()][]; + for (int i = 0; i < inputs.size(); i++) { + joinKeys[i] = new ExprNodeDesc[joinPredInfo.getEquiJoinPredicateElements().size()]; + for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(j); + RexNode key = joinLeafPredInfo.getJoinKeyExprs(j).get(0); + joinKeys[i][j] = convertToExprNode(key, inputs.get(j), null); + } + } + return joinKeys; + } + + private static SelectOperator genReduceSinkAndBacktrackSelect(Operator input, + ExprNodeDesc[] keys, int tag, ArrayList partitionCols, String order, + int numReducers, Operation acidOperation, boolean strictMode) throws SemanticException { + // 1. Generate RS operator + ReduceSinkOperator rsOp = genReduceSink(input, keys, tag, partitionCols, order, numReducers, + acidOperation, strictMode); + + // 2. Generate backtrack Select operator + Map descriptors = buildBacktrackFromReduceSink(rsOp, + input); + SelectDesc selectDesc = new SelectDesc(new ArrayList(descriptors.values()), + new ArrayList(descriptors.keySet())); + ArrayList cinfoLst = createColInfos(input); + SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc, + new RowSchema(cinfoLst), rsOp); + selectOp.setColumnExprMap(descriptors); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]"); + } + + return selectOp; + } + + private static ReduceSinkOperator genReduceSink(Operator input, ExprNodeDesc[] keys, int tag, + int numReducers, Operation acidOperation, boolean strictMode) throws SemanticException { + return genReduceSink(input, keys, tag, new ArrayList(), "", numReducers, + acidOperation, strictMode); + } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + private static ReduceSinkOperator genReduceSink(Operator input, ExprNodeDesc[] keys, int tag, + ArrayList partitionCols, String order, int numReducers, + Operation acidOperation, boolean strictMode) throws SemanticException { + Operator dummy = Operator.createDummy(); // dummy for backtracking + dummy.setParentOperators(Arrays.asList(input)); + + ArrayList reduceKeys = new ArrayList(); + ArrayList reduceKeysBack = new ArrayList(); + + // Compute join keys and store in reduceKeys + for (ExprNodeDesc key : keys) { + reduceKeys.add(key); + reduceKeysBack.add(ExprNodeDescUtils.backtrack(key, dummy, input)); + } + + // Walk over the input schema and copy in the output + ArrayList reduceValues = new ArrayList(); + ArrayList reduceValuesBack = new ArrayList(); + Map colExprMap = new HashMap(); + + List inputColumns = input.getSchema().getSignature(); + ArrayList outputColumns = new ArrayList(); + List outputColumnNames = new ArrayList(); + int[] index = new int[inputColumns.size()]; + for (int i = 0; i < inputColumns.size(); i++) { + ColumnInfo colInfo = inputColumns.get(i); + String outputColName = colInfo.getInternalName(); + ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo); + + // backtrack can be null when input is script operator + ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, input); + int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack); + if (kindex >= 0) { + ColumnInfo newColInfo = new ColumnInfo(colInfo); + newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex); + newColInfo.setAlias(outputColName); + newColInfo.setTabAlias(colInfo.getTabAlias()); + outputColumns.add(newColInfo); + index[i] = kindex; + continue; + } + int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack); + if (kindex >= 0) { + index[i] = -vindex - 1; + continue; + } + index[i] = -reduceValues.size() - 1; + + reduceValues.add(expr); + reduceValuesBack.add(exprBack); + + ColumnInfo newColInfo = new ColumnInfo(colInfo); + newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName); + newColInfo.setAlias(outputColName); + newColInfo.setTabAlias(colInfo.getTabAlias()); + + outputColumns.add(newColInfo); + outputColumnNames.add(outputColName); + } + dummy.setParentOperators(null); + + // Use only 1 reducer if no reduce keys + if (reduceKeys.size() == 0) { + numReducers = 1; + + // Cartesian product is not supported in strict mode + if (strictMode) { + throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg()); + } + } + + ReduceSinkDesc rsDesc; + if (order.isEmpty()) { + rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, + reduceKeys.size(), numReducers, acidOperation); + } else { + rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, + partitionCols, order, numReducers, acidOperation); + } + + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, + new RowSchema(outputColumns), input); + + List keyColNames = rsDesc.getOutputKeyColumnNames(); + for (int i = 0; i < keyColNames.size(); i++) { + colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i)); + } + List valColNames = rsDesc.getOutputValueColumnNames(); + for (int i = 0; i < valColNames.size(); i++) { + colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i)); + } + + rsOp.setValueIndex(index); + rsOp.setColumnExprMap(colExprMap); + rsOp.setInputAliases(input.getSchema().getColumnNames() + .toArray(new String[input.getSchema().getColumnNames().size()])); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + rsOp + " with row schema: [" + rsOp.getSchema() + "]"); + } + + return rsOp; + } + + private static JoinOperator genJoin(HiveJoin hiveJoin, JoinPredicateInfo joinPredInfo, + List> children, ExprNodeDesc[][] joinKeys) throws SemanticException { + + // Extract join type + JoinType joinType = extractJoinType(hiveJoin); + + // NOTE: Currently binary joins only + JoinCondDesc[] joinCondns = new JoinCondDesc[1]; + joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType)); + + ArrayList outputColumns = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(hiveJoin.getRowType() + .getFieldNames()); + Operator[] childOps = new Operator[children.size()]; + + Map reversedExprs = new HashMap(); + HashMap> exprMap = new HashMap>(); + Map colExprMap = new HashMap(); + HashMap> posToAliasMap = new HashMap>(); + + int outputPos = 0; + for (int pos = 0; pos < children.size(); pos++) { + ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos); + if (inputRS.getNumParent() != 1) { + throw new SemanticException("RS should have single parent"); + } + Operator parent = inputRS.getParentOperators().get(0); + ReduceSinkDesc rsDesc = inputRS.getConf(); + + int[] index = inputRS.getValueIndex(); + + Byte tag = (byte) rsDesc.getTag(); + + // Semijoin + if (joinType == JoinType.LEFTSEMI && pos != 0) { + exprMap.put(tag, new ArrayList()); + childOps[pos] = inputRS; + continue; + } + + List keyColNames = rsDesc.getOutputKeyColumnNames(); + List valColNames = rsDesc.getOutputValueColumnNames(); + + posToAliasMap.put(pos, new HashSet(inputRS.getSchema().getTableNames())); + + Map descriptors = buildBacktrackFromReduceSink(outputPos, + outputColumnNames, keyColNames, valColNames, index, parent); + + List parentColumns = parent.getSchema().getSignature(); + for (int i = 0; i < index.length; i++) { + ColumnInfo info = new ColumnInfo(parentColumns.get(i)); + info.setInternalName(outputColumnNames.get(outputPos)); + outputColumns.add(info); + reversedExprs.put(outputColumnNames.get(outputPos), tag); + outputPos++; + } + + exprMap.put(tag, new ArrayList(descriptors.values())); + colExprMap.putAll(descriptors); + childOps[pos] = inputRS; + } + + boolean noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER + && joinType != JoinType.RIGHTOUTER; + JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, joinKeys); + desc.setReversedExprs(reversedExprs); + + JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc, new RowSchema( + outputColumns), childOps); + joinOp.setColumnExprMap(colExprMap); + joinOp.setPosToAliasMap(posToAliasMap); + + // TODO: null safes? + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]"); + } + + return joinOp; + } + + private static JoinType extractJoinType(HiveJoin join) { + // UNIQUE + if (join.isDistinct()) { + return JoinType.UNIQUE; + } + // SEMIJOIN + if (join.isLeftSemiJoin()) { + return JoinType.LEFTSEMI; + } + // OUTER AND INNER JOINS + JoinType resultJoinType; + switch (join.getJoinType()) { + case FULL: + resultJoinType = JoinType.FULLOUTER; + break; + case LEFT: + resultJoinType = JoinType.LEFTOUTER; + break; + case RIGHT: + resultJoinType = JoinType.RIGHTOUTER; + break; + default: + resultJoinType = JoinType.INNER; + break; + } + return resultJoinType; + } + + private static Map buildBacktrackFromReduceSink(ReduceSinkOperator rsOp, + Operator inputOp) { + return buildBacktrackFromReduceSink(0, inputOp.getSchema().getColumnNames(), rsOp.getConf() + .getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(), + rsOp.getValueIndex(), inputOp); + } + + private static Map buildBacktrackFromReduceSink(int initialPos, + List outputColumnNames, List keyColNames, List valueColNames, + int[] index, Operator inputOp) { + Map columnDescriptors = new LinkedHashMap(); + for (int i = 0; i < index.length; i++) { + ColumnInfo info = new ColumnInfo(inputOp.getSchema().getSignature().get(i)); + String field; + if (index[i] >= 0) { + field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]); + } else { + field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1); + } + ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), + info.getIsVirtualCol()); + columnDescriptors.put(outputColumnNames.get(initialPos + i), desc); + } + return columnDescriptors; + } + + private static ExprNodeDesc convertToExprNode(RexNode rn, RelNode inputRel, String tabAlias) { + return rn.accept(new ExprNodeConverter(tabAlias, inputRel.getRowType(), false, + inputRel.getCluster().getTypeFactory())); + } + + private static ArrayList createColInfos(Operator input) { + ArrayList cInfoLst = new ArrayList(); + for (ColumnInfo ci : input.getSchema().getSignature()) { + cInfoLst.add(new ColumnInfo(ci)); + } + return cInfoLst; + } + + private static Pair, Map> createColInfos( + List calciteExprs, List hiveExprs, List projNames, + OpAttr inpOpAf) { + if (hiveExprs.size() != projNames.size()) { + throw new RuntimeException("Column expressions list doesn't match Column Names list"); + } + + RexNode rexN; + ExprNodeDesc pe; + ArrayList colInfos = new ArrayList(); + VirtualColumn vc; + Map newVColMap = new HashMap(); + for (int i = 0; i < hiveExprs.size(); i++) { + pe = hiveExprs.get(i); + rexN = calciteExprs.get(i); + vc = null; + if (rexN instanceof RexInputRef) { + vc = inpOpAf.vcolMap.get(((RexInputRef) rexN).getIndex()); + if (vc != null) { + newVColMap.put(i, vc); + } + } + colInfos + .add(new ColumnInfo(projNames.get(i), pe.getTypeInfo(), inpOpAf.tabAlias, vc != null)); + } + + return new Pair, Map>(colInfos, newVColMap); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java (revision 1673601) @@ -0,0 +1,1237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import com.google.common.collect.ImmutableList; + +/** + * TODO:
+ * 1. Change the output col/ExprNodeColumn names to external names.
+ * 2. Verify if we need to use the "KEY."/"VALUE." in RS cols; switch to + * external names if possible.
+ * 3. In ExprNode & in ColumnInfo the tableAlias/VirtualColumn is specified + * differently for different GB/RS in pipeline. Remove the different treatments. + * 3. VirtualColMap needs to be maintained + * + */ +public class HiveGBOpConvUtil { + private static enum HIVEGBPHYSICALMODE { + MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB, MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB, MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT, MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT, NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW + }; + + private static class UDAFAttrs { + private boolean isDistinctUDAF; + private String udafName; + private GenericUDAFEvaluator udafEvaluator; + private ArrayList udafParams = new ArrayList(); + private List udafParamsIndxInGBInfoDistExprs = new ArrayList(); + }; + + private static class GBInfo { + private List outputColNames = new ArrayList(); + + private List gbKeyColNamesInInput = new ArrayList(); + private List gbKeyTypes = new ArrayList(); + private List gbKeys = new ArrayList(); + + private List grpSets = new ArrayList(); + private boolean grpSetRqrAdditionalMRJob; + private boolean grpIdFunctionNeeded; + + private List distExprNames = new ArrayList(); + private List distExprTypes = new ArrayList(); + private List distExprNodes = new ArrayList(); + private List> distColIndices = new ArrayList>(); + + private List deDupedNonDistIrefs = new ArrayList(); + + private List udafAttrs = new ArrayList(); + private boolean containsDistinctAggr = false; + + float groupByMemoryUsage; + float memoryThreshold; + + private HIVEGBPHYSICALMODE gbPhysicalPipelineMode; + }; + + private static HIVEGBPHYSICALMODE getAggOPMode(HiveConf hc, GBInfo gbInfo) { + HIVEGBPHYSICALMODE gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB; + + if (hc.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { + if (!hc.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { + if (!gbInfo.grpSetRqrAdditionalMRJob) { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB; + } else { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB; + } + } else { + if (gbInfo.containsDistinctAggr || !gbInfo.gbKeys.isEmpty()) { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT; + } else { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT; + } + } + } else { + if (!hc.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW; + } else { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_SKEW; + } + } + + return gbPhysicalPipelineMode; + } + + // For each of the GB op in the logical GB this should be called seperately; + // otherwise GBevaluator and expr nodes may get shared among multiple GB ops + private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf hc) { + GBInfo gbInfo = new GBInfo(); + + // 0. Collect AggRel output col Names + gbInfo.outputColNames.addAll(aggRel.getRowType().getFieldNames()); + + // 1. Collect GB Keys + RelNode aggInputRel = aggRel.getInput(); + ExprNodeConverter exprConv = new ExprNodeConverter(inputOpAf.tabAlias, + aggInputRel.getRowType(), false, aggRel.getCluster().getTypeFactory()); + + ExprNodeDesc tmpExprNodeDesc; + for (int i : aggRel.getGroupSet()) { + RexInputRef iRef = new RexInputRef(i, (RelDataType) aggInputRel.getRowType().getFieldList() + .get(i).getType()); + tmpExprNodeDesc = iRef.accept(exprConv); + gbInfo.gbKeys.add(tmpExprNodeDesc); + gbInfo.gbKeyColNamesInInput.add(aggInputRel.getRowType().getFieldNames().get(i)); + gbInfo.gbKeyTypes.add(tmpExprNodeDesc.getTypeInfo()); + } + + // 2. Collect Grouping Set info + if (aggRel.indicator) { + // 2.1 Translate Grouping set col bitset + ImmutableList lstGrpSet = aggRel.getGroupSets(); + int bitmap = 0; + for (ImmutableBitSet grpSet : lstGrpSet) { + bitmap = 0; + for (Integer bitIdx : grpSet.asList()) { + bitmap = SemanticAnalyzer.setBit(bitmap, bitIdx); + } + gbInfo.grpSets.add(bitmap); + } + Collections.sort(gbInfo.grpSets); + + // 2.2 Check if GRpSet require additional MR Job + gbInfo.grpSetRqrAdditionalMRJob = gbInfo.grpSets.size() > hc + .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY); + + // 2.3 Check if GROUPING_ID needs to be projected out + if (!aggRel.getAggCallList().isEmpty() + && (aggRel.getAggCallList().get(aggRel.getAggCallList().size() - 1).getAggregation() == HiveGroupingID.INSTANCE)) { + gbInfo.grpIdFunctionNeeded = true; + } + } + + // 3. Walk through UDAF & Collect Distinct Info + Set distinctRefs = new HashSet(); + Map distParamInRefsToOutputPos = new HashMap(); + for (AggregateCall aggCall : aggRel.getAggCallList()) { + if ((aggCall.getAggregation() == HiveGroupingID.INSTANCE) || !aggCall.isDistinct()) { + continue; + } + + List argLst = new ArrayList(aggCall.getArgList()); + List argNames = HiveCalciteUtil.getFieldNames(argLst, aggInputRel); + ExprNodeDesc distinctExpr; + for (int i = 0; i < argLst.size(); i++) { + if (!distinctRefs.contains(argLst.get(i))) { + distinctRefs.add(argLst.get(i)); + distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size()); + distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv); + gbInfo.distExprNodes.add(distinctExpr); + gbInfo.distExprNames.add(argNames.get(i)); + gbInfo.distExprTypes.add(distinctExpr.getTypeInfo()); + } + } + } + + // 4. Walk through UDAF & Collect UDAF Info + Set deDupedNonDistIrefsSet = new HashSet(); + for (AggregateCall aggCall : aggRel.getAggCallList()) { + if (aggCall.getAggregation() == HiveGroupingID.INSTANCE) { + continue; + } + + UDAFAttrs udafAttrs = new UDAFAttrs(); + udafAttrs.udafParams.addAll(HiveCalciteUtil.getExprNodes(aggCall.getArgList(), aggInputRel, + inputOpAf.tabAlias)); + udafAttrs.udafName = aggCall.getAggregation().getName(); + udafAttrs.isDistinctUDAF = aggCall.isDistinct(); + List argLst = new ArrayList(aggCall.getArgList()); + List distColIndicesOfUDAF = new ArrayList(); + List distUDAFParamsIndxInDistExprs = new ArrayList(); + for (int i = 0; i < argLst.size(); i++) { + // NOTE: distinct expr can not be part of of GB key (we assume plan + // gen would have prevented it) + if (udafAttrs.isDistinctUDAF) { + distColIndicesOfUDAF.add(distParamInRefsToOutputPos.get(argLst.get(i))); + distUDAFParamsIndxInDistExprs.add(distParamInRefsToOutputPos.get(argLst.get(i))); + } else { + // TODO: this seems wrong (following what Hive Regular does) + if (!distParamInRefsToOutputPos.containsKey(argLst.get(i)) + && !deDupedNonDistIrefsSet.contains(argLst.get(i))) { + deDupedNonDistIrefsSet.add(i); + gbInfo.deDupedNonDistIrefs.add(udafAttrs.udafParams.get(i)); + } + } + } + + if (udafAttrs.isDistinctUDAF) { + gbInfo.containsDistinctAggr = true; + + udafAttrs.udafParamsIndxInGBInfoDistExprs = distUDAFParamsIndxInDistExprs; + gbInfo.distColIndices.add(distColIndicesOfUDAF); + } + try { + udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName, + new ArrayList(udafAttrs.udafParams), new ASTNode(), + udafAttrs.isDistinctUDAF, false); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + gbInfo.udafAttrs.add(udafAttrs); + } + + // 4. Gather GB Memory threshold + gbInfo.groupByMemoryUsage = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); + gbInfo.memoryThreshold = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + + // 5. Gather GB Physical pipeline (based on user config & Grping Sets size) + gbInfo.gbPhysicalPipelineMode = getAggOPMode(hc, gbInfo); + + return gbInfo; + } + + static OpAttr translateGB(OpAttr inputOpAf, HiveAggregate aggRel, HiveConf hc) + throws SemanticException { + OpAttr translatedGBOpAttr = null; + GBInfo gbInfo = getGBInfo(aggRel, inputOpAf, hc); + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB: + translatedGBOpAttr = genMapSideGBNoSkewNoAddMRJob(inputOpAf, aggRel, gbInfo); + break; + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + translatedGBOpAttr = genMapSideGBNoSkewAddMRJob(inputOpAf, aggRel, gbInfo); + break; + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + translatedGBOpAttr = genMapSideGBSkewGBKeysOrDistUDAFPresent(inputOpAf, aggRel, gbInfo); + break; + case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT: + translatedGBOpAttr = genMapSideGBSkewGBKeysAndDistUDAFNotPresent(inputOpAf, aggRel, gbInfo); + break; + case NO_MAP_SIDE_GB_NO_SKEW: + translatedGBOpAttr = genNoMapSideGBNoSkew(inputOpAf, aggRel, gbInfo); + break; + case NO_MAP_SIDE_GB_SKEW: + translatedGBOpAttr = genNoMapSideGBSkew(inputOpAf, aggRel, gbInfo); + break; + } + + return translatedGBOpAttr; + } + + /** + * GB-RS-GB1 + * + * Construct GB-RS-GB Pipe line. User has enabled Map Side GB, specified no + * skew and Grp Set is below the threshold. + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBNoSkewNoAddMRJob(OpAttr inputOpAf, HiveAggregate aggRel, + GBInfo gbInfo) throws SemanticException { + OpAttr mapSideGB = null; + OpAttr mapSideRS = null; + OpAttr reduceSideGB = null; + + // 1. Insert MapSide GB + mapSideGB = genMapSideGB(inputOpAf, gbInfo); + + // 2. Insert MapSide RS + mapSideRS = genMapSideGBRS(mapSideGB, gbInfo); + + // 3. Insert ReduceSide GB + reduceSideGB = genReduceSideGB1(mapSideRS, gbInfo, false, false, GroupByDesc.Mode.MERGEPARTIAL); + + return reduceSideGB; + } + + /** + * GB-RS-GB1-RS-GB2 + */ + private static OpAttr genGBRSGBRSGBOpPipeLine(OpAttr inputOpAf, HiveAggregate aggRel, + GBInfo gbInfo) throws SemanticException { + OpAttr mapSideGB = null; + OpAttr mapSideRS = null; + OpAttr reduceSideGB1 = null; + OpAttr reduceSideRS = null; + OpAttr reduceSideGB2 = null; + + // 1. Insert MapSide GB + mapSideGB = genMapSideGB(inputOpAf, gbInfo); + + // 2. Insert MapSide RS + mapSideRS = genMapSideGBRS(mapSideGB, gbInfo); + + // 3. Insert ReduceSide GB1 + boolean computeGrpSet = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT) ? false : true; + reduceSideGB1 = genReduceSideGB1(mapSideRS, gbInfo, computeGrpSet, false, GroupByDesc.Mode.PARTIALS); + + // 4. Insert RS on reduce side with Reduce side GB as input + reduceSideRS = genReduceGBRS(reduceSideGB1, gbInfo); + + // 5. Insert ReduceSide GB2 + reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo); + + return reduceSideGB2; + } + + /** + * GB-RS-GB1-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBNoSkewAddMRJob(OpAttr inputOpAf, HiveAggregate aggRel, + GBInfo gbInfo) throws SemanticException { + // 1. Sanity check + if (gbInfo.containsDistinctAggr) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + gbInfo.grpSets.size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_DISTINCTS.getMsg(errorMsg)); + } + + // 2. Gen GB-RS-GB-RS-GB pipeline + return genGBRSGBRSGBOpPipeLine(inputOpAf, aggRel, gbInfo); + } + + /** + * GB-RS-GB1-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBSkewGBKeysOrDistUDAFPresent(OpAttr inputOpAf, + HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException { + // 1. Sanity check + if (gbInfo.grpSetRqrAdditionalMRJob) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + gbInfo.grpSets.size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); + } + + // 2. Gen GB-RS-GB-RS-GB pipeline + return genGBRSGBRSGBOpPipeLine(inputOpAf, aggRel, gbInfo); + } + + /** + * GB-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBSkewGBKeysAndDistUDAFNotPresent(OpAttr inputOpAf, + HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException { + OpAttr mapSideGB = null; + OpAttr mapSideRS = null; + OpAttr reduceSideGB2 = null; + + // 1. Sanity check + if (gbInfo.grpSetRqrAdditionalMRJob) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + gbInfo.grpSets.size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); + } + + // 1. Insert MapSide GB + mapSideGB = genMapSideGB(inputOpAf, gbInfo); + + // 2. Insert MapSide RS + mapSideRS = genMapSideGBRS(mapSideGB, gbInfo); + + // 3. Insert ReduceSide GB2 + reduceSideGB2 = genReduceSideGB2(mapSideRS, gbInfo); + + return reduceSideGB2; + } + + /** + * RS-Gb1 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genNoMapSideGBNoSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) + throws SemanticException { + OpAttr mapSideRS = null; + OpAttr reduceSideGB1NoMapGB = null; + + // 1. Insert MapSide RS + mapSideRS = genMapSideRS(inputOpAf, gbInfo); + + // 2. Insert ReduceSide GB + reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.COMPLETE); + + return reduceSideGB1NoMapGB; + } + + /** + * RS-GB1-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genNoMapSideGBSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) + throws SemanticException { + OpAttr mapSideRS = null; + OpAttr reduceSideGB1NoMapGB = null; + OpAttr reduceSideRS = null; + OpAttr reduceSideGB2 = null; + + // 1. Insert MapSide RS + mapSideRS = genMapSideRS(inputOpAf, gbInfo); + + // 2. Insert ReduceSide GB + reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.PARTIAL1); + + // 3. Insert RS on reduce side with Reduce side GB as input + reduceSideRS = genReduceGBRS(reduceSideGB1NoMapGB, gbInfo); + + // 4. Insert ReduceSide GB2 + reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo); + + return reduceSideGB2; + } + + private static int getParallelismForReduceSideRS(GBInfo gbInfo) { + int degreeOfParallelism = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + case NO_MAP_SIDE_GB_SKEW: + if (gbInfo.gbKeys.isEmpty()) { + degreeOfParallelism = 1; + } else { + degreeOfParallelism = -1; + } + break; + default: + throw new RuntimeException( + "Unable to determine Reducer Parallelism - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return degreeOfParallelism; + } + + private static int getParallelismForMapSideRS(GBInfo gbInfo) { + int degreeOfParallelism = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB: + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + case NO_MAP_SIDE_GB_NO_SKEW: + if (gbInfo.gbKeys.isEmpty()) { + degreeOfParallelism = 1; + } else { + degreeOfParallelism = -1; + } + break; + case NO_MAP_SIDE_GB_SKEW: + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + degreeOfParallelism = -1; + break; + case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT: + degreeOfParallelism = 1; + break; + default: + throw new RuntimeException( + "Unable to determine Reducer Parallelism - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return degreeOfParallelism; + } + + private static int getNumPartFieldsForReduceSideRS(GBInfo gbInfo) { + int numPartFields = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + numPartFields = gbInfo.gbKeys.size() + 1; + break; + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + case NO_MAP_SIDE_GB_SKEW: + numPartFields = gbInfo.gbKeys.size(); + break; + default: + throw new RuntimeException( + "Unable to determine Number of Partition Fields - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return numPartFields; + } + + private static int getNumPartFieldsForMapSideRS(GBInfo gbInfo) { + int numPartFields = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB: + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT: + case NO_MAP_SIDE_GB_NO_SKEW: + numPartFields += gbInfo.gbKeys.size(); + break; + case NO_MAP_SIDE_GB_SKEW: + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + if (gbInfo.containsDistinctAggr) { + numPartFields = Integer.MAX_VALUE; + } else { + numPartFields = -1; + } + break; + default: + throw new RuntimeException( + "Unable to determine Number of Partition Fields - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return numPartFields; + } + + private static boolean inclGrpSetInReduceSide(GBInfo gbInfo) { + boolean inclGrpSet = false; + + if (gbInfo.grpSets.size() > 0 + && (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB || gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT)) { + inclGrpSet = true; + } + + return inclGrpSet; + } + + private static boolean inclGrpSetInMapSide(GBInfo gbInfo) { + boolean inclGrpSet = false; + + if (gbInfo.grpSets.size() > 0 + && ((gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB) || + gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT)) { + inclGrpSet = true; + } + + return inclGrpSet; + } + + private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + Map colExprMap = new HashMap(); + ArrayList outputColumnNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0); + List gb1ColInfoLst = reduceSideGB1.getSchema().getSignature(); + + ArrayList reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, + gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true); + if (inclGrpSetInReduceSide(gbInfo)) { + addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, + outputColumnNames, colInfoLst, colExprMap); + } + + ArrayList reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf() + .getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true); + + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils + .getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, + getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), + AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1); + + rsOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsOp); + } + + private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + Map colExprMap = new HashMap(); + List outputKeyColumnNames = new ArrayList(); + List outputValueColumnNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0); + int distColStartIndx = gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0); + + ArrayList reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, + outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); + int keyLength = reduceKeys.size(); + + if (inclGrpSetInMapSide(gbInfo)) { + addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, + reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap); + keyLength++; + } + if (mapGB.getConf().getKeys().size() > reduceKeys.size()) { + // NOTE: All dist cols have single output col name; + reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys() + .size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false)); + } + + ArrayList reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys() + .size(), outputValueColumnNames, colInfoLst, colExprMap, false, false); + List> distinctColIndices = getDistColIndices(gbInfo, distColStartIndx); + + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils + .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, distinctColIndices, + outputKeyColumnNames, outputValueColumnNames, true, -1, + getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), + AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB); + + rsOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsOp); + } + + private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + Map colExprMap = new HashMap(); + List outputKeyColumnNames = new ArrayList(); + List outputValueColumnNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + int distColStartIndx = gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0); + String outputColName; + + // 1. Add GB Keys to reduce keys + ArrayList reduceKeys = getReduceKeysForRS(inputOpAf.inputs.get(0), 0, + gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); + int keyLength = reduceKeys.size(); + + // 2. Add Dist UDAF args to reduce keys + if (gbInfo.containsDistinctAggr) { + // TODO: Why is this needed (doesn't represent any cols) + String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size()); + outputKeyColumnNames.add(udafName); + for (int i = 0; i < gbInfo.distExprNodes.size(); i++) { + reduceKeys.add(gbInfo.distExprNodes.get(i)); + outputColName = SemanticAnalyzer.getColumnInternalName(i); + String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." + + outputColName; + ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, + false); + colInfoLst.add(colInfo); + colExprMap.put(field, gbInfo.distExprNodes.get(i)); + } + } + + // 3. Add UDAF args deduped to reduce values + ArrayList reduceValues = new ArrayList(); + for (int i = 0; i < gbInfo.deDupedNonDistIrefs.size(); i++) { + reduceValues.add(gbInfo.deDupedNonDistIrefs.get(i)); + outputColName = SemanticAnalyzer.getColumnInternalName(reduceValues.size() - 1); + outputValueColumnNames.add(outputColName); + String field = Utilities.ReduceField.VALUE.toString() + "." + outputColName; + colInfoLst.add(new ColumnInfo(field, reduceValues.get(reduceValues.size() - 1).getTypeInfo(), + null, false)); + colExprMap.put(field, reduceValues.get(reduceValues.size() - 1)); + } + + // 4. Gen RS + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils + .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, + getDistColIndices(gbInfo, distColStartIndx), outputKeyColumnNames, + outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), + getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema( + colInfoLst), inputOpAf.inputs.get(0)); + + rsOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsOp); + } + + private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + String colOutputName = null; + ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0); + List rsColInfoLst = rs.getSchema().getSignature(); + ColumnInfo ci; + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, + gbInfo.gbKeys.size() - 1, false, false); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + ci = rsColInfoLst.get(i); + colOutputName = gbInfo.outputColNames.get(i); + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + // 1.2 Add GrpSet Col + int groupingSetsPosition = -1; + if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) { + groupingSetsPosition = gbKeys.size(); + ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false); + gbKeys.add(grpSetColExpr); + colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1); + ; + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true)); + colExprMap.put(colOutputName, grpSetColExpr); + } + + // 2. Add UDAF + UDAFAttrs udafAttr; + ArrayList aggregations = new ArrayList(); + int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() + : gbInfo.gbKeys.size() * 2; + int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1; + + for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { + udafAttr = gbInfo.udafAttrs.get(i); + ArrayList aggParameters = new ArrayList(); + aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i))); + colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i); + outputColNames.add(colOutputName); + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, + udafAttr.isDistinctUDAF); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, + aggParameters); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode)); + colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); + } + + Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, + outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, + gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), + new RowSchema(colInfoLst), rs); + + rsGBOp2.setColumnExprMap(colExprMap); + + // TODO: Shouldn't we propgate vc? is it vc col from tab or all vc + return new OpAttr("", new HashMap(), rsGBOp2); + } + + private static OpAttr genReduceSideGB1(OpAttr inputOpAf, GBInfo gbInfo, boolean computeGrpSet, + boolean propagateConstInDistinctUDAF, GroupByDesc.Mode gbMode) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + String colOutputName = null; + ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0); + List rsColInfoLst = rs.getSchema().getSignature(); + ColumnInfo ci; + boolean finalGB = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB); + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, + gbInfo.gbKeys.size() - 1, false, false); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + ci = rsColInfoLst.get(i); + if (finalGB) { + colOutputName = gbInfo.outputColNames.get(i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(i); + } + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + + // 1.2 Add GrpSet Col + int groupingSetsColPosition = -1; + if ((!finalGB && gbInfo.grpSets.size() > 0) || (finalGB && gbInfo.grpIdFunctionNeeded)) { + groupingSetsColPosition = gbInfo.gbKeys.size(); + if (computeGrpSet) { + // GrpSet Col needs to be constructed + gbKeys.add(new ExprNodeConstantDesc("0")); + } else { + // GrpSet Col already part of input RS + // TODO: Can't we just copy the ExprNodeDEsc from input (Do we need to + // explicitly set table alias to null & VC to false + gbKeys.addAll(ExprNodeDescUtils.genExprNodeDesc(rs, groupingSetsColPosition, + groupingSetsColPosition, false, true)); + } + + colOutputName = SemanticAnalyzer.getColumnInternalName(groupingSetsColPosition); + if (finalGB) { + colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1); + } + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true)); + colExprMap.put(colOutputName, gbKeys.get(groupingSetsColPosition)); + } + + // 2. Walk through UDAF and add them to GB + String lastReduceKeyColName = null; + if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) { + lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames() + .get(rs.getConf().getOutputKeyColumnNames().size() - 1); + } + int numDistinctUDFs = 0; + int distinctStartPosInReduceKeys = gbKeys.size(); + List reduceValues = rs.getConf().getValueCols(); + ArrayList aggregations = new ArrayList(); + int udafColStartPosInOriginalGB = (gbInfo.grpSets.size() > 0) ? gbInfo.gbKeys.size() * 2 + : gbInfo.gbKeys.size(); + int udafColStartPosInRS = rs.getConf().getKeyCols().size(); + for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { + UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i); + ArrayList aggParameters = new ArrayList(); + + if (udafAttr.isDistinctUDAF) { + ColumnInfo rsDistUDAFParamColInfo; + ExprNodeDesc distinctUDAFParam; + ExprNodeDesc constantPropDistinctUDAFParam; + for (int j = 0; j < udafAttr.udafParamsIndxInGBInfoDistExprs.size(); j++) { + rsDistUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j); + String rsDistUDAFParamName = rsDistUDAFParamColInfo.getInternalName(); + // TODO: verify if this is needed + if (lastReduceKeyColName != null) { + rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j); + } + distinctUDAFParam = new ExprNodeColumnDesc(rsDistUDAFParamColInfo.getType(), + rsDistUDAFParamName, rsDistUDAFParamColInfo.getTabAlias(), + rsDistUDAFParamColInfo.getIsVirtualCol()); + if (propagateConstInDistinctUDAF) { + // TODO: Implement propConstDistUDAFParams + constantPropDistinctUDAFParam = SemanticAnalyzer + .isConstantParameterInAggregationParameters( + rsDistUDAFParamColInfo.getInternalName(), reduceValues); + if (constantPropDistinctUDAFParam != null) { + distinctUDAFParam = constantPropDistinctUDAFParam; + } + } + aggParameters.add(distinctUDAFParam); + } + numDistinctUDFs++; + } else { + aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafColStartPosInRS + i))); + } + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, + aggParameters); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, + (gbMode != GroupByDesc.Mode.FINAL && udafAttr.isDistinctUDAF), udafMode)); + + if (finalGB) { + colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() + - 1); + } + + colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); + outputColNames.add(colOutputName); + } + + // Nothing special needs to be done for grouping sets if + // this is the final group by operator, and multiple rows corresponding to + // the + // grouping sets have been generated upstream. + // However, if an addition MR job has been created to handle grouping sets, + // additional rows corresponding to grouping sets need to be created here. + //TODO: Clean up/refactor assumptions + boolean includeGrpSetInGBDesc = (gbInfo.grpSets.size() > 0) + && !finalGB + && !(gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT); + Operator rsGBOp = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, + gbKeys, aggregations, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, gbInfo.grpSets, + includeGrpSetInGBDesc, groupingSetsColPosition, + gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs); + + rsGBOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsGBOp); + } + + /** + * RS-GB0 + * + * @param inputOpAf + * @param gbInfo + * @param gbMode + * @return + * @throws SemanticException + */ + private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, + GroupByDesc.Mode gbMode) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + String colOutputName = null; + ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0); + List rsColInfoLst = rs.getSchema().getSignature(); + ColumnInfo ci; + boolean useOriginalGBNames = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW); + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, + gbInfo.gbKeys.size() - 1, true, false); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + ci = rsColInfoLst.get(i); + if (useOriginalGBNames) { + colOutputName = gbInfo.outputColNames.get(i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(i); + } + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), null, false)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + + // 2. Walk through UDAF and add them to GB + String lastReduceKeyColName = null; + if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) { + lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames() + .get(rs.getConf().getOutputKeyColumnNames().size() - 1); + } + int numDistinctUDFs = 0; + int distinctStartPosInReduceKeys = gbKeys.size(); + List reduceValues = rs.getConf().getValueCols(); + ArrayList aggregations = new ArrayList(); + int udafColStartPosInOriginalGB = gbInfo.gbKeys.size(); + for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { + UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i); + ArrayList aggParameters = new ArrayList(); + + ColumnInfo rsUDAFParamColInfo; + ExprNodeDesc udafParam; + ExprNodeDesc constantPropDistinctUDAFParam; + for (int j = 0; j < udafAttr.udafParams.size(); j++) { + rsUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j); + String rsUDAFParamName = rsUDAFParamColInfo.getInternalName(); + // TODO: verify if this is needed + if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) { + rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j); + } + udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, + rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol()); + constantPropDistinctUDAFParam = SemanticAnalyzer + .isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), + reduceValues); + if (constantPropDistinctUDAFParam != null) { + udafParam = constantPropDistinctUDAFParam; + } + aggParameters.add(udafParam); + } + + if (udafAttr.isDistinctUDAF) { + numDistinctUDFs++; + } + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, + aggParameters); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode)); + if (useOriginalGBNames) { + colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() + - 1); + } + + colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); + outputColNames.add(colOutputName); + } + + Operator rsGB1 = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, + gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, + false, -1, numDistinctUDFs > 0), new RowSchema(colInfoLst), rs); + rsGB1.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsGB1); + } + + @SuppressWarnings("unchecked") + private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + Set gbKeyColsAsNamesFrmIn = new HashSet(); + String colOutputName = null; + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = new ArrayList(); + for (int i = 0; i < gbAttrs.gbKeys.size(); i++) { + gbKeys.add(gbAttrs.gbKeys.get(i)); + colOutputName = SemanticAnalyzer.getColumnInternalName(i); + colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false)); + outputColNames.add(colOutputName); + gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + // 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if + // needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet + // doesn't require additional MR Jobs + int groupingSetsPosition = -1; + boolean inclGrpID = inclGrpSetInMapSide(gbAttrs); + if (inclGrpID) { + groupingSetsPosition = gbKeys.size(); + addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap); + } + // 1.3. Add all distinct params + // NOTE: distinct expr can not be part of of GB key (we assume plan + // gen would have prevented it) + for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) { + if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) { + gbKeys.add(gbAttrs.distExprNodes.get(i)); + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1); + colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false)); + outputColNames.add(colOutputName); + gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i)); + colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1)); + } + } + + // 2. Build Aggregations + ArrayList aggregations = new ArrayList(); + for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) { + Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, + udafAttr.isDistinctUDAF); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, + udafAttr.udafParams, udafAttr.isDistinctUDAF, amode)); + GenericUDAFInfo udafInfo; + try { + udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, + udafAttr.udafParams); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() + - 1); + colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false)); + outputColNames.add(colOutputName); + } + + // 3. Create GB + @SuppressWarnings("rawtypes") + Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, + outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, + gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, + gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0)); + + // 5. Setup Expr Col Map + // NOTE: UDAF is not included in ExprColMap + gbOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), gbOp); + } + + private static void addGrpSetCol(boolean createConstantExpr, String grpSetIDExprName, + boolean addReducePrefixToColInfoName, List exprLst, + List outputColumnNames, List colInfoLst, + Map colExprMap) throws SemanticException { + String outputColName = null; + ExprNodeDesc grpSetColExpr = null; + + if (createConstantExpr) { + grpSetColExpr = new ExprNodeConstantDesc("0"); + } else { + grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, grpSetIDExprName, + null, false); + } + exprLst.add(grpSetColExpr); + + outputColName = SemanticAnalyzer.getColumnInternalName(exprLst.size() - 1); + outputColumnNames.add(outputColName); + String internalColName = outputColName; + if (addReducePrefixToColInfoName) { + internalColName = Utilities.ReduceField.KEY.toString() + "." + outputColName; + } + colInfoLst.add(new ColumnInfo(internalColName, grpSetColExpr.getTypeInfo(), null, true)); + colExprMap.put(internalColName, grpSetColExpr); + } + + /** + * Get Reduce Keys for RS following MapSide GB + * + * @param reduceKeys + * assumed to be deduped list of exprs + * @param outputKeyColumnNames + * @param colExprMap + * @return List of ExprNodeDesc of ReduceKeys + * @throws SemanticException + */ + private static ArrayList getReduceKeysForRS(Operator inOp, int startPos, + int endPos, List outputKeyColumnNames, boolean addOnlyOneKeyColName, + ArrayList colInfoLst, Map colExprMap, + boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException { + ArrayList reduceKeys = null; + if (endPos < 0) { + reduceKeys = new ArrayList(); + } else { + reduceKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, startPos, endPos, addEmptyTabAlias, + setColToNonVirtual); + int outColNameIndx = startPos; + for (int i = 0; i < reduceKeys.size(); ++i) { + String outputColName = SemanticAnalyzer.getColumnInternalName(outColNameIndx); + outColNameIndx++; + if (!addOnlyOneKeyColName || i == 0) { + outputKeyColumnNames.add(outputColName); + } + + // TODO: Verify if this is needed (Why can't it be always null/empty + String tabAlias = addEmptyTabAlias ? "" : null; + ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false); + colInfoLst.add(colInfo); + colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i)); + } + } + + return reduceKeys; + } + + /** + * Get Value Keys for RS following MapSide GB + * + * @param GroupByOperator + * MapSide GB + * @param outputKeyColumnNames + * @param colExprMap + * @return List of ExprNodeDesc of Values + * @throws SemanticException + */ + private static ArrayList getValueKeysForRS(Operator inOp, int aggStartPos, + List outputKeyColumnNames, ArrayList colInfoLst, + Map colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) + throws SemanticException { + List mapGBColInfoLst = inOp.getSchema().getSignature(); + ArrayList valueKeys = null; + if (aggStartPos >= mapGBColInfoLst.size()) { + valueKeys = new ArrayList(); + } else { + valueKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, aggStartPos, mapGBColInfoLst.size() - 1, + true, setColToNonVirtual); + for (int i = 0; i < valueKeys.size(); ++i) { + String outputColName = SemanticAnalyzer.getColumnInternalName(i); + outputKeyColumnNames.add(outputColName); + // TODO: Verify if this is needed (Why can't it be always null/empty + String tabAlias = addEmptyTabAlias ? "" : null; + ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + + outputColName, valueKeys.get(i).getTypeInfo(), tabAlias, false); + colInfoLst.add(colInfo); + colExprMap.put(colInfo.getInternalName(), valueKeys.get(i)); + } + } + + return valueKeys; + } + + private static List> getDistColIndices(GBInfo gbAttrs, int distOffSet) + throws SemanticException { + List> distColIndices = new ArrayList>(); + + for (List udafDistCols : gbAttrs.distColIndices) { + List udfAdjustedDistColIndx = new ArrayList(); + for (Integer distIndx : udafDistCols) { + udfAdjustedDistColIndx.add(distIndx + distOffSet); + } + distColIndices.add(udfAdjustedDistColIndx); + } + + return distColIndices; + } + + // TODO: Implement this + private static ExprNodeDesc propConstDistUDAFParams() { + return null; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java (working copy) @@ -24,58 +24,89 @@ import java.util.LinkedList; import java.util.List; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.rex.RexWindow; +import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * convert a RexNode to an ExprNodeDesc */ public class ExprNodeConverter extends RexVisitorImpl { - RelDataType rType; String tabAlias; + String columnAlias; + RelDataType inputRowType; + RelDataType outputRowType; boolean partitioningExpr; + WindowFunctionSpec wfs; private final RelDataTypeFactory dTFactory; - public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr, RelDataTypeFactory dTFactory) { + public ExprNodeConverter(String tabAlias, RelDataType inputRowType, + boolean partitioningExpr, RelDataTypeFactory dTFactory) { + this(tabAlias, null, inputRowType, null, partitioningExpr, dTFactory); + } + + public ExprNodeConverter(String tabAlias, String columnAlias, RelDataType inputRowType, + RelDataType outputRowType, boolean partitioningExpr, RelDataTypeFactory dTFactory) { super(true); - /* - * hb: 6/25/14 for now we only support expressions that only contain - * partition cols. there is no use case for supporting generic expressions. - * for supporting generic exprs., we need to give the converter information - * on whether a column is a partition column or not, whether a column is a - * virtual column or not. - */ - assert partitioningExpr == true; this.tabAlias = tabAlias; - this.rType = rType; + this.columnAlias = columnAlias; + this.inputRowType = inputRowType; + this.outputRowType = outputRowType; this.partitioningExpr = partitioningExpr; this.dTFactory = dTFactory; } + public WindowFunctionSpec getWindowFunctionSpec() { + return this.wfs; + } + @Override public ExprNodeDesc visitInputRef(RexInputRef inputRef) { - RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + RelDataTypeField f = inputRowType.getFieldList().get(inputRef.getIndex()); return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), f.getName(), tabAlias, partitioningExpr); } + /** + * TODO: Handle 1) cast 2) Field Access 3) Windowing Over() 4, Windowing Agg Call + */ @Override public ExprNodeDesc visitCall(RexCall call) { ExprNodeGenericFuncDesc gfDesc = null; @@ -123,6 +154,9 @@ return gfDesc; } + /** + * TODO: 1. Handle NULL + */ @Override public ExprNodeDesc visitLiteral(RexLiteral literal) { RelDataType lType = literal.getType(); @@ -176,4 +210,138 @@ } } + @Override + public ExprNodeDesc visitOver(RexOver over) { + if (!deep) { + return null; } + + final RexWindow window = over.getWindow(); + + final WindowSpec windowSpec = new WindowSpec(); + final PartitioningSpec partitioningSpec = getPSpec(window); + windowSpec.setPartitioning(partitioningSpec); + final WindowFrameSpec windowFrameSpec = getWindowRange(window); + windowSpec.setWindowFrame(windowFrameSpec); + + wfs = new WindowFunctionSpec(); + wfs.setWindowSpec(windowSpec); + final Schema schema = new Schema(tabAlias, inputRowType.getFieldList()); + final ASTNode wUDAFAst = new ASTConverter.RexVisitor(schema).visitOver(over); + wfs.setExpression(wUDAFAst); + ASTNode nameNode = (ASTNode) wUDAFAst.getChild(0); + wfs.setName(nameNode.getText()); + for(int i=1; i < wUDAFAst.getChildCount()-1; i++) { + ASTNode child = (ASTNode) wUDAFAst.getChild(i); + wfs.addArg(child); + } + wfs.setAlias(columnAlias); + + RelDataTypeField f = outputRowType.getField(columnAlias, false, false); + return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), columnAlias, tabAlias, + partitioningExpr); + } + + private PartitioningSpec getPSpec(RexWindow window) { + PartitioningSpec partitioning = new PartitioningSpec(); + + if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) { + PartitionSpec pSpec = new PartitionSpec(); + for (RexNode pk : window.partitionKeys) { + PartitionExpression exprSpec = new PartitionExpression(); + RexInputRef inputRef = (RexInputRef) pk; + RelDataTypeField f = inputRowType.getFieldList().get(inputRef.getIndex()); + ASTNode astCol; + if (tabAlias == null || tabAlias.isEmpty()) { + astCol = ASTBuilder.unqualifiedName(f.getName()); + } else { + astCol = ASTBuilder.qualifiedName(tabAlias, f.getName()); + } + exprSpec.setExpression(astCol); + pSpec.addExpression(exprSpec); + } + partitioning.setPartSpec(pSpec); + } + + if (window.orderKeys != null && !window.orderKeys.isEmpty()) { + OrderSpec oSpec = new OrderSpec(); + for (RexFieldCollation ok : window.orderKeys) { + OrderExpression exprSpec = new OrderExpression(); + Order order = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? + Order.ASC : Order.DESC; + exprSpec.setOrder(order); + RexInputRef inputRef = (RexInputRef) ok.left; + RelDataTypeField f = inputRowType.getFieldList().get(inputRef.getIndex()); + ASTNode astCol; + if (tabAlias == null || tabAlias.isEmpty()) { + astCol = ASTBuilder.unqualifiedName(f.getName()); + } else { + astCol = ASTBuilder.qualifiedName(tabAlias, f.getName()); + } + exprSpec.setExpression(astCol); + oSpec.addExpression(exprSpec); + } + partitioning.setOrderSpec(oSpec); + } + + return partitioning; + } + + private WindowFrameSpec getWindowRange(RexWindow window) { + // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical) + + WindowFrameSpec windowFrame = new WindowFrameSpec(); + + BoundarySpec start = null; + RexWindowBound ub = window.getUpperBound(); + if (ub != null) { + start = getWindowBound(ub, window.isRows()); + } + + BoundarySpec end = null; + RexWindowBound lb = window.getLowerBound(); + if (lb != null) { + end = getWindowBound(lb, window.isRows()); + } + + if (start != null || end != null) { + if (start != null) { + windowFrame.setStart(start); + } + if (end != null) { + windowFrame.setEnd(end); + } + } + + return windowFrame; + } + + private BoundarySpec getWindowBound(RexWindowBound wb, boolean isRows) { + BoundarySpec boundarySpec; + + if (wb.isCurrentRow()) { + boundarySpec = new CurrentRowSpec(); + } else { + final Direction direction; + final int amt; + if (wb.isPreceding()) { + direction = Direction.PRECEDING; + } else { + direction = Direction.FOLLOWING; + } + if (wb.isUnbounded()) { + amt = BoundarySpec.UNBOUNDED_AMOUNT; + } else { + amt = RexLiteral.intValue(wb.getOffset()); + } + if (isRows) { + boundarySpec = new RangeBoundarySpec(direction, amt); + } else { + boundarySpec = new ValueBoundarySpec(direction, amt); + } + } + + return boundarySpec; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -68,7 +69,7 @@ // However in HIVE DB name can not appear in select list; in case of join // where table names differ only in DB name, Hive would require user // introducing explicit aliases for tbl. - b.add(HiveParser.Identifier, hTbl.getTableAlias()); + b.add(HiveParser.Identifier, ((HiveTableScan)scan).getTableAlias()); return b.node(); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java (working copy) @@ -50,9 +50,11 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ImmutableList; @@ -95,6 +97,23 @@ return newTopNode; } + private static String getTblAlias(RelNode rel) { + + if (null == rel) { + return null; + } + if (rel instanceof HiveTableScan) { + return ((HiveTableScan)rel).getTableAlias(); + } + if (rel instanceof Project) { + return null; + } + if (rel.getInputs().size() == 1) { + return getTblAlias(rel.getInput(0)); + } + return null; + } + private static void convertOpTree(RelNode rel, RelNode parent) { if (rel instanceof HepRelVertex) { @@ -103,6 +122,12 @@ if (!validJoinParent(rel, parent)) { introduceDerivedTable(rel, parent); } + String leftChild = getTblAlias(((Join)rel).getLeft()); + if (null != leftChild && leftChild.equalsIgnoreCase(getTblAlias(((Join)rel).getRight()))) { + // introduce derived table above one child, if this is self-join + // since user provided aliases are lost at this point. + introduceDerivedTable(((Join)rel).getLeft(), rel); + } } else if (rel instanceof MultiJoin) { throw new RuntimeException("Found MultiJoin"); } else if (rel instanceof RelSubset) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java (working copy) @@ -54,11 +54,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -69,8 +71,8 @@ public class ASTConverter { private static final Log LOG = LogFactory.getLog(ASTConverter.class); - private RelNode root; - private HiveAST hiveAST; + private final RelNode root; + private final HiveAST hiveAST; private RelNode from; private Filter where; private Aggregate groupBy; @@ -213,7 +215,7 @@ private void convertLimitToASTNode(HiveSort limit) { if (limit != null) { - HiveSort hiveLimit = (HiveSort) limit; + HiveSort hiveLimit = limit; RexNode limitExpr = hiveLimit.getFetchExpr(); if (limitExpr != null) { Object val = ((RexLiteral) limitExpr).getValue2(); @@ -224,12 +226,12 @@ private void convertOBToASTNode(HiveSort order) { if (order != null) { - HiveSort hiveSort = (HiveSort) order; + HiveSort hiveSort = order; if (!hiveSort.getCollation().getFieldCollations().isEmpty()) { // 1 Add order by token ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - schema = new Schema((HiveSort) hiveSort); + schema = new Schema(hiveSort); Map obRefToCallMap = hiveSort.getInputRefToCallMap(); RexNode obExpr; ASTNode astCol; @@ -370,7 +372,7 @@ static class RexVisitor extends RexVisitorImpl { private final Schema schema; - private boolean useTypeQualInLiteral; + private final boolean useTypeQualInLiteral; protected RexVisitor(Schema schema) { this(schema, false); @@ -567,7 +569,7 @@ private static final long serialVersionUID = 1L; Schema(TableScan scan) { - String tabName = ((RelOptHiveTable) scan.getTable()).getTableAlias(); + String tabName = ((HiveTableScan) scan).getTableAlias(); for (RelDataTypeField field : scan.getRowType().getFieldList()) { add(new ColumnInfo(tabName, field.getName())); } @@ -641,7 +643,13 @@ add(new ColumnInfo(null, projName)); } } + + public Schema(String tabAlias, List fieldList) { + for (RelDataTypeField field : fieldList) { + add(new ColumnInfo(tabAlias, field.getName())); } + } + } /* * represents Column information exposed by a QueryBlock. Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java (revision 1673601) @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Exchange; +import org.apache.calcite.rel.core.Join; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; + +import com.google.common.collect.ImmutableList; + +/** Not an optimization rule. + * Rule to aid in translation from Calcite tree -> Hive tree. + * Transforms : + * Left Right Left Right + * \ / -> \ / + * Join HashExchange HashExchange + * \ / + * Join + */ +public class HiveInsertExchange4JoinRule extends RelOptRule { + + protected static transient final Log LOG = LogFactory + .getLog(HiveInsertExchange4JoinRule.class); + + public HiveInsertExchange4JoinRule() { + // match join with exactly 2 inputs + super(RelOptRule.operand(Join.class, + operand(RelNode.class, any()), + operand(RelNode.class, any()))); + } + + @Override + public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + + if (call.rel(1) instanceof Exchange && + call.rel(2) instanceof Exchange) { + return; + } + + JoinPredicateInfo joinPredInfo = + HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + + // get key columns from inputs. Those are the columns on which we will distribute on. + // It is also the columns we will sort on. + List joinLeftKeyPositions = new ArrayList(); + List joinRightKeyPositions = new ArrayList(); + ImmutableList.Builder leftCollationListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder rightCollationListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()) { + leftCollationListBuilder.add(new RelFieldCollation(leftPos)); + } + joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()); + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()) { + rightCollationListBuilder.add(new RelFieldCollation(rightPos)); + } + } + + HiveSortExchange left = HiveSortExchange.create(join.getLeft(), + new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinLeftKeyPositions), + new HiveRelCollation(leftCollationListBuilder.build())); + HiveSortExchange right = HiveSortExchange.create(join.getRight(), + new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinRightKeyPositions), + new HiveRelCollation(rightCollationListBuilder.build())); + + Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), + left, right, join.getJoinType(), join.isSemiJoinDone()); + + call.getPlanner().onCopy(join, newJoin); + + call.transformTo(newJoin); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java (revision 1673601) @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +import com.google.common.collect.ImmutableList; + +public final class HiveJoinAddNotNullRule extends RelOptRule { + + private static final String NOT_NULL_FUNC_NAME = "isnotnull"; + + /** The singleton. */ + public static final HiveJoinAddNotNullRule INSTANCE = + new HiveJoinAddNotNullRule(HiveFilter.DEFAULT_FILTER_FACTORY); + + private final FilterFactory filterFactory; + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates an HiveJoinAddNotNullRule. + */ + public HiveJoinAddNotNullRule(FilterFactory filterFactory) { + super(operand(Join.class, + operand(RelNode.class, any()), + operand(RelNode.class, any()))); + this.filterFactory = filterFactory; + } + + //~ Methods ---------------------------------------------------------------- + + public void onMatch(RelOptRuleCall call) { + final Join join = call.rel(0); + RelNode leftInput = call.rel(1); + RelNode rightInput = call.rel(2); + + if (join.getJoinType() != JoinRelType.INNER) { + return; + } + + if (join.getCondition().isAlwaysTrue()) { + return; + } + + JoinPredicateInfo joinPredInfo = + HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + + Set joinLeftKeyPositions = new HashSet(); + Set joinRightKeyPositions = new HashSet(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()); + joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()); + } + + // Build not null conditions + final RelOptCluster cluster = join.getCluster(); + final RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + + final Map newLeftConditions = getNotNullConditions(cluster, + rexBuilder, leftInput, joinLeftKeyPositions); + final Map newRightConditions = getNotNullConditions(cluster, + rexBuilder, rightInput, joinRightKeyPositions); + + // Nothing will be added to the expression + if (newLeftConditions == null && newRightConditions == null) { + return; + } + + if (newLeftConditions != null) { + if (leftInput instanceof HiveFilter) { + leftInput = leftInput.getInput(0); + } + leftInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, + leftInput, newLeftConditions.values()); + } + if (newRightConditions != null) { + if (rightInput instanceof HiveFilter) { + rightInput = rightInput.getInput(0); + } + rightInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, + rightInput, newRightConditions.values()); + } + + Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), + leftInput, rightInput, join.getJoinType(), join.isSemiJoinDone()); + + call.getPlanner().onCopy(join, newJoin); + + call.transformTo(newJoin); + } + + private static Map getNotNullConditions(RelOptCluster cluster, + RexBuilder rexBuilder, RelNode input, Set inputKeyPositions) { + + boolean added = false; + + final RelDataType returnType = cluster.getTypeFactory(). + createSqlType(SqlTypeName.BOOLEAN); + + final Map newConditions; + if (input instanceof HiveFilter) { + newConditions = splitCondition(((HiveFilter) input).getCondition()); + } + else { + newConditions = new HashMap(); + } + for (int pos : inputKeyPositions) { + try { + RelDataType keyType = input.getRowType().getFieldList().get(pos).getType(); + // Nothing to do if key cannot be null + if (!keyType.isNullable()) { + continue; + } + SqlOperator funcCall = SqlFunctionConverter.getCalciteOperator(NOT_NULL_FUNC_NAME, + FunctionRegistry.getFunctionInfo(NOT_NULL_FUNC_NAME).getGenericUDF(), + ImmutableList.of(keyType), returnType); + RexNode cond = rexBuilder.makeCall(funcCall, rexBuilder.makeInputRef(input, pos)); + String digest = cond.toString(); + if (!newConditions.containsKey(digest)) { + newConditions.put(digest,cond); + added = true; + } + } catch (SemanticException e) { + throw new AssertionError(e.getMessage()); + } + } + // Nothing will be added to the expression + if (!added) { + return null; + } + return newConditions; + } + + private static Map splitCondition(RexNode condition) { + Map newConditions = new HashMap(); + if (condition.getKind() == SqlKind.AND) { + for (RexNode node : ((RexCall) condition).getOperands()) { + newConditions.put(node.toString(), node); + } + } + else { + newConditions.put(condition.toString(), condition); + } + return newConditions; + } + + private static RelNode createHiveFilterConjunctiveCondition(FilterFactory filterFactory, + RexBuilder rexBuilder, RelNode input, Collection conditions) { + final RexNode newCondition = RexUtil.composeConjunction(rexBuilder, conditions, false); + return filterFactory.createFilter(input, newCondition); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java (working copy) @@ -28,8 +28,10 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; @@ -50,13 +52,18 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.calcite.util.Util; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; @@ -319,11 +326,11 @@ return this.mapOfProjIndxInJoinSchemaToLeafPInfo; } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j) { return constructJoinPredicateInfo(j, j.getCondition()); } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j, RexNode predicate) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j, RexNode predicate) { JoinPredicateInfo jpi = null; JoinLeafPredicateInfo jlpi = null; List equiLPIList = new ArrayList(); @@ -432,6 +439,16 @@ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema); } + public List getJoinKeyExprs(int input) { + if (input == 0) { + return this.joinKeyExprsFromLeft; + } + if (input == 1) { + return this.joinKeyExprsFromRight; + } + return null; + } + public List getJoinKeyExprsFromLeft() { return this.joinKeyExprsFromLeft; } @@ -461,7 +478,7 @@ return this.projsFromRightPartOfJoinKeysInJoinSchema; } - private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoin j, RexNode pe) { + private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(Join j, RexNode pe) { JoinLeafPredicateInfo jlpi = null; List filterNulls = new ArrayList(); List joinKeyExprsFromLeft = new ArrayList(); @@ -561,6 +578,107 @@ return deterministic; } + public static ImmutableMap getColInfoMap(List hiveCols, + int startIndx) { + Builder bldr = ImmutableMap. builder(); + + int indx = startIndx; + for (T ci : hiveCols) { + bldr.put(indx, ci); + indx++; + } + + return bldr.build(); + } + + public static ImmutableMap shiftVColsMap(Map hiveVCols, + int shift) { + Builder bldr = ImmutableMap. builder(); + + for (Integer pos : hiveVCols.keySet()) { + bldr.put(shift + pos, hiveVCols.get(pos)); + } + + return bldr.build(); + } + + public static ImmutableMap getVColsMap(List hiveVCols, + int startIndx) { + Builder bldr = ImmutableMap. builder(); + + int indx = startIndx; + for (VirtualColumn vc : hiveVCols) { + bldr.put(indx, vc); + indx++; + } + + return bldr.build(); + } + + public static ImmutableMap getColNameIndxMap(List tableFields) { + Builder bldr = ImmutableMap. builder(); + + int indx = 0; + for (FieldSchema fs : tableFields) { + bldr.put(fs.getName(), indx); + indx++; + } + + return bldr.build(); + } + + public static ImmutableMap getRowColNameIndxMap(List rowFields) { + Builder bldr = ImmutableMap. builder(); + + int indx = 0; + for (RelDataTypeField rdt : rowFields) { + bldr.put(rdt.getName(), indx); + indx++; + } + + return bldr.build(); + } + + public static ImmutableList getInputRef(List inputRefs, RelNode inputRel) { + ImmutableList.Builder bldr = ImmutableList. builder(); + for (int i : inputRefs) { + bldr.add(new RexInputRef(i, (RelDataType) inputRel.getRowType().getFieldList().get(i).getType())); + } + return bldr.build(); + } + + public static ExprNodeDesc getExprNode(Integer inputRefIndx, RelNode inputRel, + ExprNodeConverter exprConv) { + ExprNodeDesc exprNode = null; + RexNode rexInputRef = new RexInputRef(inputRefIndx, (RelDataType) inputRel.getRowType() + .getFieldList().get(inputRefIndx).getType()); + exprNode = rexInputRef.accept(exprConv); + + return exprNode; + } + + public static List getExprNodes(List inputRefs, RelNode inputRel, + String inputTabAlias) { + List exprNodes = new ArrayList(); + List rexInputRefs = getInputRef(inputRefs, inputRel); + // TODO: Change ExprNodeConverter to be independent of Partition Expr + ExprNodeConverter exprConv = new ExprNodeConverter(inputTabAlias, inputRel.getRowType(), false, inputRel.getCluster().getTypeFactory()); + for (RexNode iRef : rexInputRefs) { + exprNodes.add(iRef.accept(exprConv)); + } + return exprNodes; + } + + public static List getFieldNames(List inputRefs, RelNode inputRel) { + List fieldNames = new ArrayList(); + List schemaNames = inputRel.getRowType().getFieldNames(); + for (Integer iRef : inputRefs) { + fieldNames.add(schemaNames.get(iRef)); + } + + return fieldNames; + } + /** * Walks over an expression and determines whether it is constant. */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java (revision 1673601) @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdCollation; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdCollation { + + public static final RelMetadataProvider SOURCE = + ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLLATIONS.method, new HiveRelMdCollation()), + RelMdCollation.SOURCE)); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdCollation() {} + + //~ Methods ---------------------------------------------------------------- + + public ImmutableList collations(HiveAggregate aggregate) { + // Compute collations + ImmutableList.Builder collationListBuilder = + new ImmutableList.Builder(); + for (int pos : aggregate.getGroupSet().asList()) { + final RelFieldCollation fieldCollation = new RelFieldCollation(pos); + collationListBuilder.add(fieldCollation); + } + // Return aggregate collations + return ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(collationListBuilder.build()))); + } + + public ImmutableList collations(HiveJoin join) { + return join.getCollation(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java (revision 1673601) @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdParallelism; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +public class HiveRelMdParallelism extends RelMdParallelism { + + private final Double maxSplitSize; + + //~ Constructors ----------------------------------------------------------- + + public HiveRelMdParallelism(Double maxSplitSize) { + this.maxSplitSize = maxSplitSize; + } + + public RelMetadataProvider getMetadataProvider() { + return ReflectiveRelMetadataProvider.reflectiveSource(this, + BuiltInMethod.IS_PHASE_TRANSITION.method, + BuiltInMethod.SPLIT_COUNT.method); + } + + //~ Methods ---------------------------------------------------------------- + + public Boolean isPhaseTransition(HiveJoin join) { + return join.isPhaseTransition(); + } + + public Boolean isPhaseTransition(HiveSort sort) { + // As Exchange operator is introduced later on, we make a + // sort operator create a new stage for the moment + return true; + } + + public Integer splitCount(HiveJoin join) { + return join.getSplitCount(); + } + + public Integer splitCount(HiveTableScan scan) { + RelOptHiveTable table = (RelOptHiveTable) scan.getTable(); + return table.getHiveTableMD().getNumBuckets(); + } + + public Integer splitCount(RelNode rel) { + Boolean newPhase = RelMetadataQuery.isPhaseTransition(rel); + + if (newPhase == null) { + return null; + } + + if (newPhase) { + // We repartition: new number of splits + return splitCountRepartition(rel); + } + + // We do not repartition: take number of splits from children + Integer splitCount = 0; + for (RelNode input : rel.getInputs()) { + splitCount += RelMetadataQuery.splitCount(input); + } + return splitCount; + } + + public Integer splitCountRepartition(RelNode rel) { + // We repartition: new number of splits + final Double averageRowSize = RelMetadataQuery.getAverageRowSize(rel); + final Double rowCount = RelMetadataQuery.getRowCount(rel); + if (averageRowSize == null || rowCount == null) { + return null; + } + final Double totalSize = averageRowSize * rowCount; + final Double splitCount = totalSize / maxSplitSize; + return splitCount.intValue(); + } + +} + +// End RelMdParallelism.java \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java (revision 1673601) @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdMemory; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; + +public class HiveRelMdMemory extends RelMdMemory { + + private static final HiveRelMdMemory INSTANCE = new HiveRelMdMemory(); + + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource(INSTANCE, + BuiltInMethod.MEMORY.method, + BuiltInMethod.CUMULATIVE_MEMORY_WITHIN_PHASE.method, + BuiltInMethod.CUMULATIVE_MEMORY_WITHIN_PHASE_SPLIT.method); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdMemory() {} + + //~ Methods ---------------------------------------------------------------- + + public Double memory(HiveTableScan tableScan) { + return 0.0d; + } + + public Double memory(HiveAggregate aggregate) { + final Double avgRowSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); + final Double rowCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + if (avgRowSize == null || rowCount == null) { + return null; + } + return avgRowSize * rowCount; + } + + public Double memory(HiveFilter filter) { + return 0.0; + } + + public Double memory(HiveJoin join) { + return join.getMemory(); + } + + public Double cumulativeMemoryWithinPhaseSplit(HiveJoin join) { + return join.getCumulativeMemoryWithinPhaseSplit(); + } + + public Double memory(HiveLimit limit) { + return 0.0; + } + + public Double memory(HiveProject project) { + return 0.0; + } + + public Double memory(HiveSort sort) { + if (sort.getCollation() != RelCollations.EMPTY) { + // It sorts + final Double avgRowSize = RelMetadataQuery.getAverageRowSize(sort.getInput()); + final Double rowCount = RelMetadataQuery.getRowCount(sort.getInput()); + if (avgRowSize == null || rowCount == null) { + return null; + } + return avgRowSize * rowCount; + } + // It does not sort, memory footprint is zero + return 0.0; + } + + public Double memory(HiveUnion union) { + return 0.0; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java (revision 1673601) @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.List; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdSize; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableNullableList; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdSize extends RelMdSize { + + private static final HiveRelMdSize INSTANCE = new HiveRelMdSize(); + + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource(INSTANCE, + BuiltInMethod.AVERAGE_COLUMN_SIZES.method, + BuiltInMethod.AVERAGE_ROW_SIZE.method); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdSize() {} + + //~ Methods ---------------------------------------------------------------- + + public List averageColumnSizes(HiveTableScan scan) { + List neededcolsLst = scan.getNeededColIndxsFrmReloptHT(); + List columnStatistics = ((RelOptHiveTable) scan.getTable()) + .getColStat(neededcolsLst, true); + + // Obtain list of col stats, or use default if they are not available + final ImmutableList.Builder list = ImmutableList.builder(); + int indxRqdCol = 0; + int nFields = scan.getRowType().getFieldCount(); + for (int i = 0; i < nFields; i++) { + if (neededcolsLst.contains(i)) { + ColStatistics columnStatistic = columnStatistics.get(indxRqdCol); + indxRqdCol++; + if (columnStatistic == null) { + RelDataTypeField field = scan.getRowType().getFieldList().get(i); + list.add(averageTypeValueSize(field.getType())); + } else { + list.add(columnStatistic.getAvgColLen()); + } + } else { + list.add(new Double(0)); + } + } + + return list.build(); + } + + public List averageColumnSizes(HiveJoin rel) { + final RelNode left = rel.getLeft(); + final RelNode right = rel.getRight(); + final List lefts = + RelMetadataQuery.getAverageColumnSizes(left); + List rights = null; + if (!rel.isLeftSemiJoin()) { + rights = RelMetadataQuery.getAverageColumnSizes(right); + } + if (lefts == null && rights == null) { + return null; + } + final int fieldCount = rel.getRowType().getFieldCount(); + Double[] sizes = new Double[fieldCount]; + if (lefts != null) { + lefts.toArray(sizes); + } + if (rights != null) { + final int leftCount = left.getRowType().getFieldCount(); + for (int i = 0; i < rights.size(); i++) { + sizes[leftCount + i] = rights.get(i); + } + } + return ImmutableNullableList.copyOf(sizes); + } + + // TODO: remove when averageTypeValueSize method RelMdSize + // supports all types + public Double averageTypeValueSize(RelDataType type) { + switch (type.getSqlTypeName()) { + case BOOLEAN: + case TINYINT: + return 1d; + case SMALLINT: + return 2d; + case INTEGER: + case FLOAT: + case REAL: + case DECIMAL: + case DATE: + case TIME: + return 4d; + case BIGINT: + case DOUBLE: + case TIMESTAMP: + case INTERVAL_DAY_TIME: + case INTERVAL_YEAR_MONTH: + return 8d; + case BINARY: + return (double) type.getPrecision(); + case VARBINARY: + return Math.min((double) type.getPrecision(), 100d); + case CHAR: + return (double) type.getPrecision() * BYTES_PER_CHARACTER; + case VARCHAR: + // Even in large (say VARCHAR(2000)) columns most strings are small + return Math.min((double) type.getPrecision() * BYTES_PER_CHARACTER, 100d); + case ROW: + Double average = 0.0; + for (RelDataTypeField field : type.getFieldList()) { + average += averageTypeValueSize(field.getType()); + } + return average; + default: + return null; + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java (revision 1673601) @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdDistribution; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdDistribution { + + public static final RelMetadataProvider SOURCE = + ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()), + RelMdDistribution.SOURCE)); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdDistribution() {} + + //~ Methods ---------------------------------------------------------------- + + public RelDistribution distribution(HiveAggregate aggregate) { + return new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, + aggregate.getGroupSet().asList()); + } + + public RelDistribution distribution(HiveJoin join) { + return join.getDistribution(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java (working copy) @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import java.util.BitSet; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java (working copy) @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import java.util.ArrayList; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java (working copy) @@ -20,21 +20,62 @@ import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys; import com.google.common.collect.ImmutableList; public class HiveDefaultRelMetadataProvider { - private HiveDefaultRelMetadataProvider() { + + private final HiveConf hiveConf; + + + public HiveDefaultRelMetadataProvider(HiveConf hiveConf) { + this.hiveConf = hiveConf; } - public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList - .of(HiveRelMdDistinctRowCount.SOURCE, + public RelMetadataProvider getMetadataProvider() { + + // Create cost metadata provider + final HiveCostModel cm; + if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") + && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.EXTENDED_COST_MODEL)) { + cm = HiveOnTezCostModel.INSTANCE; + } else { + cm = HiveDefaultCostModel.INSTANCE; + } + + // Get max split size for HiveRelMdParallelism + final Double maxSplitSize = (double) HiveConf.getLongVar( + this.hiveConf, + HiveConf.ConfVars.MAPREDMAXSPLITSIZE); + + // Return MD provider + return ChainedRelMetadataProvider.of(ImmutableList + .of( + HiveRelMdDistinctRowCount.SOURCE, + new HiveRelMdCost(cm).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, + HiveRelMdSize.SOURCE, + HiveRelMdMemory.SOURCE, + new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), + HiveRelMdDistribution.SOURCE, + HiveRelMdCollation.SOURCE, new DefaultRelMetadataProvider())); } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -142,7 +142,9 @@ if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { transformations.add(new ReduceSinkDeDuplication()); } + if(!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { transformations.add(new NonBlockingOpDeDupProc()); + } if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)) { transformations.add(new IdentityProjectRemover()); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (working copy) @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; /** @@ -102,6 +103,14 @@ return tableNames; } + public List getColumnNames() { + List columnNames = new ArrayList(); + for (ColumnInfo var : this.signature) { + columnNames.add(var.getInternalName()); + } + return columnNames; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof RowSchema) || (obj == null)) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -229,9 +229,9 @@ private HashMap opToPartPruner; private HashMap opToPartList; - private HashMap> topOps; - private final HashMap> topSelOps; - private final LinkedHashMap, OpParseContext> opParseCtx; + protected HashMap> topOps; + private HashMap> topSelOps; + protected LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; private final Map joinContext; @@ -258,7 +258,7 @@ private CreateViewDesc createVwDesc; private ArrayList viewsExpanded; private ASTNode viewSelect; - private final UnparseTranslator unparseTranslator; + protected final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx; // prefix for column names auto generated by hive @@ -478,7 +478,7 @@ wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) { continue; } - wFnSpec.setAlias("_wcol" + wColIdx); + wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx); spec.addWindowFunction(wFnSpec); qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression()); } @@ -3448,7 +3448,7 @@ return ret; } - private int setBit(int bitmap, int bitIdx) { + public static int setBit(int bitmap, int bitIdx) { return bitmap | (1 << bitIdx); } @@ -3984,10 +3984,10 @@ /** * Class to store GenericUDAF related information. */ - static class GenericUDAFInfo { - ArrayList convertedParameters; - GenericUDAFEvaluator genericUDAFEvaluator; - TypeInfo returnType; + public static class GenericUDAFInfo { + public ArrayList convertedParameters; + public GenericUDAFEvaluator genericUDAFEvaluator; + public TypeInfo returnType; } /** @@ -4028,7 +4028,7 @@ * Returns the GenericUDAFEvaluator for the aggregation. This is called once * for each GroupBy aggregation. */ - static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, + public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, ArrayList aggParameters, ASTNode aggTree, boolean isDistinct, boolean isAllColumns) throws SemanticException { @@ -4058,7 +4058,7 @@ * @throws SemanticException * when the UDAF is not found or has problems. */ - static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, + public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, GenericUDAFEvaluator.Mode emode, ArrayList aggParameters) throws SemanticException { @@ -4087,7 +4087,7 @@ return r; } - static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( + public static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( GroupByDesc.Mode mode, boolean isDistinct) { switch (mode) { case COMPLETE: @@ -4130,7 +4130,7 @@ * @return the ExprNodeDesc of the constant parameter if the given internalName represents * a constant parameter; otherwise, return null */ - private ExprNodeDesc isConstantParameterInAggregationParameters(String internalName, + public static ExprNodeDesc isConstantParameterInAggregationParameters(String internalName, List reduceValues) { // only the pattern of "VALUE._col([0-9]+)" should be handled. @@ -5577,7 +5577,7 @@ return false; } - private void checkExpressionsForGroupingSet(List grpByExprs, + void checkExpressionsForGroupingSet(List grpByExprs, List distinctGrpByExprs, Map aggregationTrees, RowResolver inputRowResolver) throws SemanticException { @@ -6131,7 +6131,7 @@ } @SuppressWarnings("nls") - private Operator genFileSinkPlan(String dest, QB qb, Operator input) + protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); @@ -9234,7 +9234,7 @@ return equalsExpr; } - private String getAliasId(String alias, QB qb) { + protected String getAliasId(String alias, QB qb) { return (qb.getId() == null ? alias : qb.getId() + ":" + alias).toLowerCase(); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (working copy) @@ -38,7 +38,7 @@ * SemanticAnalyzer.saveViewDefinition() calls TokenRewriteStream.toString(). * */ -class UnparseTranslator { +public class UnparseTranslator { // key is token start index private final NavigableMap translations; private final List copyTranslations; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (working copy) @@ -49,8 +49,8 @@ import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollations; -import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; @@ -58,8 +58,10 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -116,12 +118,14 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; @@ -135,8 +139,11 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; @@ -175,6 +182,7 @@ import com.google.common.collect.Lists; public class CalcitePlanner extends SemanticAnalyzer { + private final AtomicInteger noColsMissingStats = new AtomicInteger(0); private List topLevelFieldSchema; private SemanticException semanticException; @@ -218,13 +226,16 @@ if (cboCtx.type == PreCboCtx.Type.CTAS) { queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query } - runCBO = canHandleAstForCbo(queryForCbo, getQB(), cboCtx); + runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx); if (runCBO) { disableJoinMerge = true; boolean reAnalyzeAST = false; try { + if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + sinkOp = getOptimizedHiveOPDag(); + } else { // 1. Gen Optimized AST ASTNode newAST = getOptimizedAST(); @@ -252,6 +263,7 @@ LOG.info("CBO Succeeded; optimized logical plan."); this.ctx.setCboInfo("Plan optimized by CBO."); LOG.debug(newAST.dump()); + } } catch (Exception e) { boolean isMissingStats = noColsMissingStats.get() > 0; if (isMissingStats) { @@ -324,7 +336,7 @@ * If top level QB is query then everything below it must also be * Query. */ - boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { + boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { int root = ast.getToken().getType(); boolean needToLogMessage = STATIC_LOG.isInfoEnabled(); boolean isSupportedRoot = root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN @@ -598,6 +610,57 @@ return optiqOptimizedAST; } + /** + * Get Optimized Hive Operator DAG for the given QB tree in the semAnalyzer. + * + * @return Optimized Hive operator tree + * @throws SemanticException + */ + Operator getOptimizedHiveOPDag() throws SemanticException { + RelNode optimizedOptiqPlan = null; + CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + + try { + optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks + .newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); + } catch (Exception e) { + rethrowCalciteException(e); + throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); + } + + RelNode modifiedOptimizedOptiqPlan = introduceProjectIfNeeded(optimizedOptiqPlan); + + Operator hiveRoot = new HiveOpConverter(this, conf, unparseTranslator, topOps, + conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(modifiedOptimizedOptiqPlan); + RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB()); + opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR)); + return genFileSinkPlan(getQB().getParseInfo().getClauseNames().iterator().next(), getQB(), hiveRoot); + } + + private RelNode introduceProjectIfNeeded(RelNode optimizedOptiqPlan) + throws CalciteSemanticException { + RelNode parent = null; + RelNode input = optimizedOptiqPlan; + RelNode newRoot = optimizedOptiqPlan; + + while (!(input instanceof Project) && (input instanceof Sort)) { + parent = input; + input = input.getInput(0); + } + + if (!(input instanceof Project)) { + HiveProject hpRel = HiveProject.create(input, + HiveCalciteUtil.getProjsFromBelowAsInputRef(input), input.getRowType().getFieldNames()); + if (input == optimizedOptiqPlan) { + newRoot = hpRel; + } else { + parent.replaceInput(0, hpRel); + } + } + + return newRoot; + } + /*** * Unwraps Calcite Invocation exceptions coming meta data provider chain and * obtains the real cause. @@ -674,6 +737,24 @@ || t instanceof UndeclaredThrowableException; } + private RowResolver genRowResolver(Operator op, QB qb) { + RowResolver rr = new RowResolver(); + String subqAlias = (qb.getAliases().size() == 1 && qb.getSubqAliases().size() == 1) ? qb + .getAliases().get(0) : null; + + for (ColumnInfo ci : op.getSchema().getSignature()) { + try { + rr.putWithCheck((subqAlias != null) ? subqAlias : ci.getTabAlias(), + ci.getAlias() != null ? ci.getAlias() : ci.getInternalName(), ci.getInternalName(), + new ColumnInfo(ci)); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + } + + return rr; + } + /** * Code responsible for Calcite plan generation and optimization. */ @@ -700,7 +781,13 @@ /* * recreate cluster, so that it picks up the additional traitDef */ - RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); + final Double maxSplitSize = (double) HiveConf.getLongVar( + conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); + final Double maxMemory = (double) HiveConf.getLongVar( + conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); + HiveConfigContext confContext = new HiveConfigContext(algorithmsConf); + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); @@ -719,13 +806,16 @@ throw new RuntimeException(e); } + // Create MD provider + HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); + // 2. Apply Pre Join Order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, - HiveDefaultRelMetadataProvider.INSTANCE); + mdProvider.getMetadataProvider()); // 3. Appy Join Order Optimizations using Hep Planner (MST Algorithm) List list = Lists.newArrayList(); - list.add(HiveDefaultRelMetadataProvider.INSTANCE); + list.add(mdProvider.getMetadataProvider()); RelTraitSet desiredTraits = cluster .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); @@ -758,6 +848,18 @@ calciteOptimizedPlan = hepPlanner.findBestExp(); + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + // run rules to aid in translation from Optiq tree -> Hive tree + hepPgm = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new HiveInsertExchange4JoinRule()).build(); + hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + hepPlanner.setRoot(calciteOptimizedPlan); + calciteOptimizedPlan = hepPlanner.findBestExp(); + } + if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { LOG.debug("CBO Planning details:\n"); LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); @@ -789,7 +891,12 @@ basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 2. PPD + // 2. Add not null filters + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); + } + + // 3. PPD basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, @@ -802,19 +909,19 @@ HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - // 3. Transitive inference & Partition Pruning + // 4. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 4. Projection Pruning + // 5. Projection Pruning RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 5. Rerun PPD through Project as column pruning would have introduced DT + // 6. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, @@ -1186,7 +1293,7 @@ } // 2. Get Table Metadata - Table tab = qb.getMetaData().getSrcForAlias(tableAlias); + Table tabMetaData = qb.getMetaData().getSrcForAlias(tableAlias); // 3. Get Table Logical Schema (Row Type) // NOTE: Table logical schema = Non Partition Cols + Partition Cols + @@ -1194,7 +1301,7 @@ // 3.1 Add Column info for non partion cols (Object Inspector fields) @SuppressWarnings("deprecation") - StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() + StructObjectInspector rowObjectInspector = (StructObjectInspector) tabMetaData.getDeserializer() .getObjectInspector(); List fields = rowObjectInspector.getAllStructFieldRefs(); ColumnInfo colInfo; @@ -1216,7 +1323,7 @@ ArrayList partitionColumns = new ArrayList(); // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tab.getPartCols()) { + for (FieldSchema part_col : tabMetaData.getPartCols()) { colName = part_col.getName(); colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); @@ -1226,6 +1333,7 @@ } // 3.3 Add column info corresponding to virtual columns + List virtualCols = new ArrayList(); Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); while (vcs.hasNext()) { VirtualColumn vc = vcs.next(); @@ -1233,24 +1341,26 @@ vc.getIsHidden()); rr.put(tableAlias, vc.getName(), colInfo); cInfoLst.add(colInfo); + virtualCols.add(vc); } // 3.4 Build row type from field RelDataType rowType = TypeConverter.getType(cluster, rr, null); // 4. Build RelOptAbstractTable - String fullyQualifiedTabName = tab.getDbName(); - if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) - fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); - else - fullyQualifiedTabName = tab.getTableName(); + String fullyQualifiedTabName = tabMetaData.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { + fullyQualifiedTabName = fullyQualifiedTabName + "." + tabMetaData.getTableName(); + } + else { + fullyQualifiedTabName = tabMetaData.getTableName(); + } RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache, - noColsMissingStats); + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, noColsMissingStats, getAliasId(tableAlias, qb)); // 5. Build Hive Table Scan Rel - tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, - rowType); + tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias); // 6. Add Schema(RR) to RelNode-Schema map ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, @@ -1768,23 +1878,53 @@ qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); } } + List grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true : false; + final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); + + // 2. Sanity check + if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) + && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg()); + } + if (cubeRollupGrpSetPresent) { + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { + throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg()); + } + + if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { + checkExpressionsForGroupingSet(grpByAstExprs, qb.getParseInfo() + .getDistinctFuncExprsForClause(detsClauseName), aggregationTrees, + this.relToHiveRR.get(srcRel)); + + if (qbp.getDestGroupingSets().size() > conf + .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY)) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + qbp.getDestGroupingSets().size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); + } + } + } + + if (hasGrpByAstExprs || hasAggregationTrees) { ArrayList gbExprNDescLst = new ArrayList(); ArrayList outputColumnNames = new ArrayList(); - // 2. Input, Output Row Resolvers + // 3. Input, Output Row Resolvers RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); RowResolver groupByOutputRowResolver = new RowResolver(); groupByOutputRowResolver.setIsExprResolver(true); if (hasGrpByAstExprs) { - // 3. Construct GB Keys (ExprNode) + // 4. Construct GB Keys (ExprNode) for (int i = 0; i < grpByAstExprs.size(); ++i) { ASTNode grpbyExpr = grpByAstExprs.get(i); Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( @@ -1799,12 +1939,10 @@ } } - // 4. GroupingSets, Cube, Rollup + // 5. GroupingSets, Cube, Rollup int groupingColsSize = gbExprNDescLst.size(); List groupingSets = null; - if (!qbp.getDestRollups().isEmpty() - || !qbp.getDestGroupingSets().isEmpty() - || !qbp.getDestCubes().isEmpty()) { + if (cubeRollupGrpSetPresent) { if (qbp.getDestRollups().contains(detsClauseName)) { groupingSets = getGroupingSetsForRollup(grpByAstExprs.size()); } else if (qbp.getDestCubes().contains(detsClauseName)) { @@ -1827,18 +1965,18 @@ } } - // 5. Construct aggregation function Info + // 6. Construct aggregation function Info ArrayList aggregations = new ArrayList(); if (hasAggregationTrees) { assert (aggregationTrees != null); for (ASTNode value : aggregationTrees.values()) { - // 5.1 Determine type of UDAF + // 6.1 Determine type of UDAF // This is the GenericUDAF name String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText()); boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; - // 5.2 Convert UDAF Params to ExprNodeDesc + // 6.2 Convert UDAF Params to ExprNodeDesc ArrayList aggParameters = new ArrayList(); for (int i = 1; i < value.getChildCount(); i++) { ASTNode paraExpr = (ASTNode) value.getChild(i); @@ -1862,7 +2000,7 @@ } } - // 6. If GroupingSets, Cube, Rollup were used, we account grouping__id + // 7. If GroupingSets, Cube, Rollup were used, we account grouping__id if(groupingSets != null && !groupingSets.isEmpty()) { String field = getColumnInternalName(groupingColsSize + aggregations.size()); outputColumnNames.add(field); @@ -1874,7 +2012,7 @@ true)); } - // 7. We create the group_by operator + // 8. We create the group_by operator gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel); relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); @@ -2250,15 +2388,27 @@ } } - return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); + return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel, windowExpressions); } private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, RelNode srcRel) throws CalciteSemanticException { + return genSelectRelNode(calciteColLst, out_rwsch, srcRel, null); + } + + private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, + RelNode srcRel, List windowExpressions) throws CalciteSemanticException { // 1. Build Column Names Set colNamesSet = new HashSet(); List cInfoLst = out_rwsch.getRowSchema().getSignature(); ArrayList columnNames = new ArrayList(); + Map windowToAlias = null; + if (windowExpressions != null ) { + windowToAlias = new HashMap(); + for (WindowExpressionSpec wes : windowExpressions) { + windowToAlias.put(wes.getExpression().toStringTree().toLowerCase(), wes.getAlias()); + } + } String[] qualifiedColNames; String tmpColAlias; for (int i = 0; i < calciteColLst.size(); i++) { @@ -2276,8 +2426,11 @@ * the names so we don't run into this issue when converting back to * Hive AST. */ - if (tmpColAlias.startsWith("_c")) + if (tmpColAlias.startsWith("_c")) { tmpColAlias = "_o_" + tmpColAlias; + } else if (windowToAlias != null && windowToAlias.containsKey(tmpColAlias)) { + tmpColAlias = windowToAlias.get(tmpColAlias); + } int suffix = 1; while (colNamesSet.contains(tmpColAlias)) { tmpColAlias = qualifiedColNames[1] + suffix; @@ -2769,4 +2922,5 @@ return tabAliases; } } + } Index: ql/.gitignore =================================================================== --- ql/.gitignore (.../https://svn.apache.org/repos/asf/hive/trunk) (revision 1673613) +++ ql/.gitignore (working copy) @@ -1 +1,2 @@ dependency-reduced-pom.xml +/bin/ Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Reverse-merged /hive/branches/cbo:r1605012-1627125 Merged /hive/trunk:r1605012-1673598