Index: metastore/bin/.gitignore =================================================================== --- metastore/bin/.gitignore (revision 1674187) +++ metastore/bin/.gitignore (working copy) @@ -1 +1,2 @@ -# Dummy file to make Git recognize this empty directory +/scripts/ +/src/ Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1674187) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -703,7 +703,22 @@ // CBO related HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."), + HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", false, "Flag to control calcite plan to hive operator conversion"), + HIVE_CBO_EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", false, "Flag to control enabling the extended cost model based on" + + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."), + HIVE_CBO_COST_MODEL_CPU("hive.cbo.costmodel.cpu", "0.000001", "Default cost of a comparison"), + HIVE_CBO_COST_MODEL_NET("hive.cbo.costmodel.network", "150.0", "Default cost of a transfering a byte over network;" + + " expressed as multiple of CPU cost"), + HIVE_CBO_COST_MODEL_LFS_WRITE("hive.cbo.costmodel.local.fs.write", "4.0", "Default cost of writing a byte to local FS;" + + " expressed as multiple of NETWORK cost"), + HIVE_CBO_COST_MODEL_LFS_READ("hive.cbo.costmodel.local.fs.read", "4.0", "Default cost of reading a byte from local FS;" + + " expressed as multiple of NETWORK cost"), + HIVE_CBO_COST_MODEL_HDFS_WRITE("hive.cbo.costmodel.hdfs.write", "10.0", "Default cost of writing a byte to HDFS;" + + " expressed as multiple of Local FS write cost"), + HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" + + " expressed as multiple of Local FS read cost"), + // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, // need to remove by hive .13. Also, do not change default (see SMB operator) HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""), Index: ql/.gitignore =================================================================== --- ql/.gitignore (revision 1674187) +++ ql/.gitignore (working copy) @@ -1 +1,3 @@ dependency-reduced-pom.xml +/bin/ +/target/ Index: ql/src/test/results/clientpositive/spark/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join33.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/join33.q.out (working copy) @@ -113,16 +113,16 @@ Map 1 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,9 +136,12 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -146,11 +149,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -160,26 +165,23 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Map 3 Map Operator Tree: TableScan @@ -188,7 +190,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -197,7 +199,7 @@ Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -258,24 +260,24 @@ Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -286,13 +288,13 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -328,12 +330,9 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -341,13 +340,11 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -357,23 +354,26 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out (working copy) @@ -346,28 +346,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -767,7 +767,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -775,7 +775,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1296,28 +1296,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1629,21 +1629,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1651,7 +1651,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1998,21 +1998,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -2020,7 +2020,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2954,7 +2954,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -2962,7 +2962,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3277,28 +3277,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3601,28 +3601,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3995,28 +3995,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4349,20 +4349,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4862,33 +4862,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -4896,7 +4896,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5450,14 +5450,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5898,28 +5898,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5969,15 +5969,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) @@ -5985,7 +5985,7 @@ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: false Reducer 5 Needs Tagging: false @@ -6008,35 +6008,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6626,28 +6626,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7076,28 +7076,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7487,28 +7487,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7949,28 +7949,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8420,28 +8420,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8850,28 +8850,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/spark/ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/ptf.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/ptf.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -571,28 +571,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -748,21 +748,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -770,7 +770,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -950,21 +950,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -972,7 +972,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1440,7 +1440,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1448,7 +1448,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1613,28 +1613,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1786,28 +1786,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2019,28 +2019,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2202,20 +2202,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2414,33 +2414,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -2448,7 +2448,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2767,14 +2767,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2954,28 +2954,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3006,22 +3006,22 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) Reducer 5 Reduce Operator Tree: Select Operator @@ -3042,35 +3042,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3444,28 +3444,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3710,28 +3710,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3952,28 +3952,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4231,28 +4231,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4492,28 +4492,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4740,28 +4740,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/spark/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/subquery_in.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out (working copy) @@ -327,7 +327,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -335,7 +335,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -495,7 +495,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -503,7 +503,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/spark/ptf_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/ptf_streaming.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/ptf_streaming.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -615,7 +615,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -623,7 +623,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -788,28 +788,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1021,28 +1021,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1256,28 +1256,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1491,28 +1491,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1705,33 +1705,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1739,7 +1739,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1980,28 +1980,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2246,28 +2246,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2496,28 +2496,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/spark/join32_lessSize.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join32_lessSize.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out (working copy) @@ -121,16 +121,16 @@ Map 1 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -144,9 +144,12 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -154,11 +157,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -168,26 +173,23 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Map 3 Map Operator Tree: TableScan @@ -196,7 +198,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -205,7 +207,7 @@ Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -266,24 +268,24 @@ Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -294,13 +296,13 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -336,12 +338,9 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -349,13 +348,11 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -365,23 +362,26 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-0 Move Operator @@ -430,8 +430,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -613,35 +613,34 @@ STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-3 is a root stage Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 1 Local Work: Map Reduce Local Work @@ -650,7 +649,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -660,14 +659,14 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -680,44 +679,39 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /src [w] + Map 3 Map Operator Tree: TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col1 (type: string) - Position of Big Table: 1 + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -725,7 +719,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -735,14 +729,14 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -755,20 +749,20 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [w] + /src1 [x] Map 4 Map Operator Tree: TableScan @@ -783,22 +777,11 @@ expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -875,57 +858,68 @@ keys: 0 _col0 (type: string) 1 _col1 (type: string) - outputColumnNames: _col1, _col4 + outputColumnNames: _col1 input vertices: - 1 Map 4 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col3, _col6 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col4 input vertices: - 0 Map 1 - Position of Big Table: 1 + 1 Map 4 + Position of Big Table: 0 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3, _col6 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string #### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 + name default.dest_j1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: Index: ql/src/test/results/clientpositive/spark/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join32.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/join32.q.out (working copy) @@ -113,16 +113,16 @@ Map 1 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,9 +136,12 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -146,11 +149,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -160,26 +165,23 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Map 3 Map Operator Tree: TableScan @@ -188,7 +190,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -197,7 +199,7 @@ Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -258,24 +260,24 @@ Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -286,13 +288,13 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -328,12 +330,9 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -341,13 +340,11 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -357,23 +354,26 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (working copy) @@ -390,9 +390,9 @@ Stage: Stage-1 Spark Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -401,69 +401,71 @@ alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -471,11 +473,27 @@ Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -485,38 +503,22 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -542,9 +544,9 @@ Stage: Stage-1 Spark Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -553,35 +555,36 @@ alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -595,27 +598,28 @@ outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -623,11 +627,27 @@ Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -637,38 +657,22 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out (working copy) @@ -185,11 +185,11 @@ (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 Where Clause SubQuery Joining Condition: - on part.p_size = sq_1._wcol0 + on part.p_size = sq_1.first_value_window_0 Rewritten Query: select p_mfgr, p_name, p_size -from part left semi join (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 on part.p_size = sq_1._wcol0 +from part left semi join (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 on part.p_size = sq_1.first_value_window_0 where 1 = 1 PREHOOK: query: -- non agg, non corr, with join in Parent Query explain rewrite Index: ql/src/test/results/clientpositive/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/join33.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/join33.q.out (working copy) @@ -109,25 +109,71 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -141,7 +187,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -150,31 +196,31 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -183,11 +229,11 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -356,7 +402,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -405,8 +451,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/ptf.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/ptf.q.out (working copy) @@ -97,28 +97,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -313,7 +313,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -321,7 +321,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -585,28 +585,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -766,21 +766,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -788,7 +788,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -982,21 +982,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -1004,7 +1004,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1480,7 +1480,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1488,7 +1488,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1657,28 +1657,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1834,28 +1834,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2081,28 +2081,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2268,20 +2268,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2492,33 +2492,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -2526,7 +2526,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2853,14 +2853,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3075,28 +3075,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3150,15 +3150,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3176,7 +3176,7 @@ sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -3196,35 +3196,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3551,28 +3551,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3841,28 +3841,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4097,28 +4097,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4400,28 +4400,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4675,28 +4675,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4937,28 +4937,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/subquery_in.q.out (working copy) @@ -278,7 +278,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -286,7 +286,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -458,7 +458,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -466,7 +466,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/subquery_in_having.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in_having.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/subquery_in_having.q.out (working copy) @@ -1357,17 +1357,17 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: first_value_window_0 arguments: _col1 name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(MAX)~ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _wcol0 is not null (type: boolean) + predicate: first_value_window_0 is not null (type: boolean) Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _wcol0 (type: string) + expressions: first_value_window_0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Group By Operator Index: ql/src/test/results/clientpositive/tez/cbo_join.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/cbo_join.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/tez/cbo_join.q.out (working copy) @@ -1,4 +1,5 @@ -PREHOOK: query: -- 4. Test Select + Join + TS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6,7 +7,8 @@ PREHOOK: Input: default@cbo_t2 PREHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: -- 4. Test Select + Join + TS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 @@ -122,126 +124,46 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 1 1 1 @@ -282,46 +204,6 @@ 1 1 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 1 1 1 @@ -522,6 +404,126 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -632,8 +634,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -730,6 +730,8 @@ 1 1 1 1 1 1 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -744,8 +746,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -847,6 +847,8 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -861,10 +863,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -966,6 +964,10 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL +NULL NULL +NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5334,8 +5336,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -5870,6 +5870,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6430,8 +6432,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -6966,6 +6966,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: -- 5. Test Select + Join + FIL + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0) PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/tez/ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/ptf.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/tez/ptf.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -571,28 +571,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -748,21 +748,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -770,7 +770,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -950,21 +950,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -972,7 +972,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1440,7 +1440,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1448,7 +1448,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1613,28 +1613,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1786,28 +1786,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2019,28 +2019,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2202,20 +2202,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2414,33 +2414,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -2448,7 +2448,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2767,14 +2767,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2986,28 +2986,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3038,22 +3038,22 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) Reducer 5 Reduce Operator Tree: Select Operator @@ -3074,35 +3074,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3431,28 +3431,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3697,28 +3697,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3939,28 +3939,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4218,28 +4218,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4479,28 +4479,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4727,28 +4727,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/tez/explainuser_1.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out (working copy) @@ -3665,7 +3665,7 @@ outputColumnNames:["_col0"] Statistics:Num rows: 13 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator [FIL_26] - predicate:_wcol0 is not null (type: boolean) + predicate:first_value_window_0 is not null (type: boolean) Statistics:Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator [PTF_11] Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}] @@ -7596,9 +7596,9 @@ Map-reduce partition columns:_col2 (type: string) sort order:++ Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - value expressions:_wcol0 (type: bigint), _col5 (type: int) + value expressions:sum_window_0 (type: bigint), _col5 (type: int) Select Operator [SEL_13] - outputColumnNames:["_col1","_col2","_col5","_wcol0"] + outputColumnNames:["_col1","_col2","_col5","sum_window_0"] Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE PTF Operator [PTF_12] Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}] Index: ql/src/test/results/clientpositive/tez/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/subquery_in.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/tez/subquery_in.q.out (working copy) @@ -335,7 +335,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -343,7 +343,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -507,7 +507,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -515,7 +515,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out (working copy) @@ -346,28 +346,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -768,7 +768,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -776,7 +776,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1297,28 +1297,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1630,21 +1630,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1652,7 +1652,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1999,21 +1999,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -2021,7 +2021,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2957,7 +2957,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -2965,7 +2965,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3280,28 +3280,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3604,28 +3604,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3998,28 +3998,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4352,20 +4352,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4866,33 +4866,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -4900,7 +4900,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5454,14 +5454,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5939,28 +5939,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6010,15 +6010,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) @@ -6026,7 +6026,7 @@ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: true Reducer 5 Needs Tagging: false @@ -6049,35 +6049,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6616,28 +6616,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7066,28 +7066,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7477,28 +7477,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7939,28 +7939,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8410,28 +8410,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8840,28 +8840,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/tez/ptf_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/ptf_streaming.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/tez/ptf_streaming.q.out (working copy) @@ -93,28 +93,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -297,7 +297,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -305,7 +305,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -615,7 +615,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -623,7 +623,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -788,28 +788,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1021,28 +1021,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1256,28 +1256,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1491,28 +1491,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1705,33 +1705,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1739,7 +1739,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1980,28 +1980,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2246,28 +2246,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2496,28 +2496,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/tez/explainuser_2.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out (working copy) @@ -53,11 +53,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@ss POSTHOOK: Lineage: ss.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: INSERT OVERWRITE TABLE sr SELECT x.key,x.value,y.key,y.value,z.key,z.value FROM src1 x @@ -81,11 +81,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@sr POSTHOOK: Lineage: sr.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sr.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sr.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: sr.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: sr.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: sr.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: INSERT OVERWRITE TABLE cs SELECT x.key,x.value,y.key,y.value,z.key,z.value FROM src1 x @@ -195,7 +195,7 @@ Merge Join Operator [MERGEJOIN_29] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col5"] + | outputColumnNames:["_col0","_col4","_col5"] | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] | Reduce Output Operator [RS_14] @@ -203,15 +203,14 @@ | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_1] - | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_25] - | predicate:key is not null (type: boolean) + | predicate:value is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_0] - | alias:y + | alias:z | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Reducer 4 [SIMPLE_EDGE] Reduce Output Operator [RS_16] @@ -219,11 +218,11 @@ Map-reduce partition columns:_col3 (type: string) sort order:+ Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions:_col0 (type: string) + value expressions:_col1 (type: string), _col2 (type: string) Merge Join Operator [MERGEJOIN_28] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col0","_col3"] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE |<-Map 3 [SIMPLE_EDGE] | Reduce Output Operator [RS_8] @@ -231,27 +230,28 @@ | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) | Select Operator [SEL_4] - | outputColumnNames:["_col0"] + | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_26] - | predicate:value is not null (type: boolean) + | predicate:key is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:z + | TableScan [TS_3] + | alias:y | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map 5 [SIMPLE_EDGE] Reduce Output Operator [RS_10] - key expressions:_col1 (type: string) - Map-reduce partition columns:_col1 (type: string) + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) sort order:+ Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions:_col0 (type: string) + value expressions:_col1 (type: string) Select Operator [SEL_6] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Filter Operator [FIL_27] - predicate:(value is not null and key is not null) (type: boolean) + predicate:(key is not null and value is not null) (type: boolean) Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE TableScan [TS_5] alias:x @@ -315,21 +315,21 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 9 <- Reducer 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 + Reducer 6 File Output Operator [FS_71] compressed:false Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE @@ -339,236 +339,236 @@ Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_69] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 4 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] Reduce Output Operator [RS_68] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_66] | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] Reduce Output Operator [RS_65] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_64] aggregations:["count(_col3)","count(_col4)","count(_col5)"] keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_62] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_113] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_111] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col15 (type: string), _col17 (type: string)","0":"_col1 (type: string), _col3 (type: string)"} - | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] - | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_58] - | key expressions:_col1 (type: string), _col3 (type: string) - | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | keys:{"1":"_col8 (type: string), _col10 (type: string)","0":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 12 [SIMPLE_EDGE] + | Reduce Output Operator [RS_60] + | key expressions:_col8 (type: string), _col10 (type: string) + | Map-reduce partition columns:_col8 (type: string), _col10 (type: string) | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col2 (type: string) - | Merge Join Operator [MERGEJOIN_107] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 1 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_53] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_99] - | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:cs - | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE - | |<-Map 6 [SIMPLE_EDGE] - | Reduce Output Operator [RS_55] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_4] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 9 [SIMPLE_EDGE] - Reduce Output Operator [RS_60] - key expressions:_col15 (type: string), _col17 (type: string) - Map-reduce partition columns:_col15 (type: string), _col17 (type: string) + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col6 (type: string), _col7 (type: string) + | Select Operator [SEL_46] + | outputColumnNames:["_col10","_col6","_col7","_col8"] + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_109] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col6","_col7","_col8","_col10"] + | | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + | |<-Map 11 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_42] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_19] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_101] + | | predicate:((key = 'src1key') and value is not null) (type: boolean) + | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_17] + | | alias:src1 + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 14 [SIMPLE_EDGE] + | Reduce Output Operator [RS_44] + | key expressions:_col5 (type: string) + | Map-reduce partition columns:_col5 (type: string) + | sort order:+ + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string) + | Merge Join Operator [MERGEJOIN_108] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col4","_col5","_col6","_col8"] + | | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + | |<-Map 13 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_22] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_102] + | | predicate:((value = 'd1value') and key is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_20] + | | alias:d1 + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 16 [SIMPLE_EDGE] + | Reduce Output Operator [RS_38] + | key expressions:_col2 (type: string) + | Map-reduce partition columns:_col2 (type: string) + | sort order:+ + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) + | Merge Join Operator [MERGEJOIN_107] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col4","_col6"] + | | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + | |<-Map 15 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_30] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_25] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_103] + | | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_23] + | | alias:srcpart + | | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + | |<-Map 17 [SIMPLE_EDGE] + | Reduce Output Operator [RS_32] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) + | sort order:+ + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + | Select Operator [SEL_28] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_104] + | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_26] + | alias:ss + | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_58] + key expressions:_col8 (type: string), _col10 (type: string) + Map-reduce partition columns:_col8 (type: string), _col10 (type: string) sort order:++ - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - value expressions:_col6 (type: string), _col7 (type: string), _col14 (type: string) - Select Operator [SEL_51] - outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_112] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col2 (type: string), _col4 (type: string)","0":"_col8 (type: string), _col10 (type: string)"} - | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 16 [SIMPLE_EDGE] - | Reduce Output Operator [RS_49] - | key expressions:_col2 (type: string), _col4 (type: string) - | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) - | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col3 (type: string), _col5 (type: string) - | Merge Join Operator [MERGEJOIN_111] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 15 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_36] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - | | Select Operator [SEL_31] - | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_105] - | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_29] - | | alias:sr - | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | |<-Map 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_38] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_34] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_106] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_32] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_47] - key expressions:_col8 (type: string), _col10 (type: string) - Map-reduce partition columns:_col8 (type: string), _col10 (type: string) - sort order:++ - Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - value expressions:_col6 (type: string), _col7 (type: string) - Merge Join Operator [MERGEJOIN_110] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"} - | outputColumnNames:["_col6","_col7","_col8","_col10"] - | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - |<-Map 7 [SIMPLE_EDGE] - | Reduce Output Operator [RS_42] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_7] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_101] - | predicate:((key = 'src1key') and value is not null) (type: boolean) - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_5] - | alias:src1 - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 11 [SIMPLE_EDGE] - Reduce Output Operator [RS_44] - key expressions:_col5 (type: string) - Map-reduce partition columns:_col5 (type: string) - sort order:+ - Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string) - Merge Join Operator [MERGEJOIN_109] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col4","_col5","_col6","_col8"] - | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_24] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_102] - | predicate:((value = 'd1value') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_8] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 13 [SIMPLE_EDGE] - Reduce Output Operator [RS_26] - key expressions:_col2 (type: string) - Map-reduce partition columns:_col2 (type: string) - sort order:+ - Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) - Merge Join Operator [MERGEJOIN_108] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col6"] - | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - |<-Map 12 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_13] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_103] - | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_11] - | alias:srcpart - | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - |<-Map 14 [SIMPLE_EDGE] - Reduce Output Operator [RS_20] - key expressions:_col3 (type: string) - Map-reduce partition columns:_col3 (type: string) - sort order:+ - Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - Select Operator [SEL_16] - outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] - Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_104] - predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) - Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_14] - alias:ss - Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string) + Merge Join Operator [MERGEJOIN_110] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col3 (type: string), _col5 (type: string)","0":"_col1 (type: string), _col3 (type: string)"} + | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"] + | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_53] + | key expressions:_col1 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | sort order:++ + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col2 (type: string) + | Merge Join Operator [MERGEJOIN_105] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_48] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_97] + | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:cs + | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + | |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_50] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_4] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_98] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_55] + key expressions:_col3 (type: string), _col5 (type: string) + Map-reduce partition columns:_col3 (type: string), _col5 (type: string) + sort order:++ + Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: string), _col4 (type: string) + Merge Join Operator [MERGEJOIN_106] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_14] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_100] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_8] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Select Operator [SEL_7] + outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_99] + predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean) + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_5] + alias:sr + Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -590,33 +590,33 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 13 <- Union 12 (SIMPLE_EDGE) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS) -Map 11 <- Union 12 (CONTAINS) -Map 1 <- Union 2 (CONTAINS) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) -Map 7 <- Union 2 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE) -Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Map 15 <- Union 12 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 13 <- Union 14 (CONTAINS) +Map 5 <- Union 6 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Map 9 <- Union 6 (CONTAINS) +Reducer 8 <- Map 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 15 <- Union 14 (SIMPLE_EDGE) +Reducer 16 <- Map 18 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Map 17 <- Union 14 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 + Reducer 4 File Output Operator [FS_61] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_59] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 5 [SIMPLE_EDGE] - |<-Reducer 14 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 2 [CONTAINS] | Reduce Output Operator [RS_58] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -624,98 +624,98 @@ | Group By Operator [GBY_57] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_53] + | Select Operator [SEL_26] | outputColumnNames:["_col0","_col1"] | Merge Join Operator [MERGEJOIN_85] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col2"] - | |<-Reducer 13 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_49] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2"] + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_22] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_37] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_1] | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_36] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 12 [SIMPLE_EDGE] - | | |<-Map 11 [CONTAINS] - | | | Reduce Output Operator [RS_35] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_34] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_28] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_78] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_27] - | | | alias:x - | | |<-Map 15 [CONTAINS] - | | Reduce Output Operator [RS_35] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_34] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_30] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_79] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_29] - | | alias:y - | |<-Reducer 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_51] - | key expressions:_col2 (type: string) - | Map-reduce partition columns:_col2 (type: string) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_76] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Merge Join Operator [MERGEJOIN_83] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Merge Join Operator [MERGEJOIN_84] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2"] - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Map 16 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_43] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 10 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_39] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_80] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_38] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Map 18 [SIMPLE_EDGE] - | Reduce Output Operator [RS_45] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_14] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_79] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_13] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_16] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_41] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_81] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_40] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 4 [CONTAINS] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_12] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_11] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 6 [SIMPLE_EDGE] + | |<-Map 5 [CONTAINS] + | | Reduce Output Operator [RS_10] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_9] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_3] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_77] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_2] + | | alias:x + | |<-Map 9 [CONTAINS] + | Reduce Output Operator [RS_10] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_9] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_78] + | predicate:value is not null (type: boolean) + | TableScan [TS_4] + | alias:y + |<-Reducer 12 [CONTAINS] Reduce Output Operator [RS_58] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -723,97 +723,97 @@ Group By Operator [GBY_57] keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Select Operator [SEL_26] + Select Operator [SEL_53] outputColumnNames:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_84] + Merge Join Operator [MERGEJOIN_87] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col0","_col2"] - |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2"] + |<-Map 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_49] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_28] | outputColumnNames:["_col0"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_74] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 7 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_75] - | predicate:value is not null (type: boolean) - | TableScan [TS_2] - | alias:y - |<-Reducer 9 [SIMPLE_EDGE] - Reduce Output Operator [RS_24] - key expressions:_col2 (type: string) - Map-reduce partition columns:_col2 (type: string) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_80] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_27] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 16 [SIMPLE_EDGE] + Reduce Output Operator [RS_51] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) sort order:+ - Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string) - Merge Join Operator [MERGEJOIN_82] + Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string) + Merge Join Operator [MERGEJOIN_86] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2"] - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + |<-Map 18 [SIMPLE_EDGE] + | Reduce Output Operator [RS_45] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_14] + | value expressions:_col0 (type: string) + | Select Operator [SEL_41] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_77] - | predicate:(key is not null and value is not null) (type: boolean) + | Filter Operator [FIL_83] + | predicate:(value is not null and key is not null) (type: boolean) | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_13] + | TableScan [TS_40] | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_16] + |<-Reducer 15 [SIMPLE_EDGE] + Reduce Output Operator [RS_43] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_12] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_39] outputColumnNames:["_col0"] - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_76] - predicate:key is not null (type: boolean) - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_11] - alias:y - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_38] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + |<-Union 14 [SIMPLE_EDGE] + |<-Map 13 [CONTAINS] + | Reduce Output Operator [RS_37] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_36] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_30] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_81] + | predicate:value is not null (type: boolean) + | TableScan [TS_29] + | alias:x + |<-Map 17 [CONTAINS] + Reduce Output Operator [RS_37] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_36] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_32] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_82] + predicate:value is not null (type: boolean) + TableScan [TS_31] + alias:y PREHOOK: query: explain SELECT x.key, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -843,45 +843,45 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Map 24 <- Union 25 (CONTAINS) -Map 32 <- Union 25 (CONTAINS) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 30 <- Union 29 (SIMPLE_EDGE) -Map 13 <- Union 14 (CONTAINS) -Map 34 <- Union 29 (CONTAINS) -Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) -Map 1 <- Union 2 (CONTAINS) -Map 20 <- Union 16 (CONTAINS) -Map 33 <- Union 27 (CONTAINS) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Map 19 <- Union 14 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS) -Reducer 17 <- Union 16 (SIMPLE_EDGE) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 32 <- Union 31 (SIMPLE_EDGE) +Map 11 <- Union 8 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 25 <- Map 24 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 22 <- Union 18 (CONTAINS) +Map 21 <- Union 16 (CONTAINS) +Map 34 <- Union 27 (CONTAINS) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 33 <- Map 37 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Map 36 <- Union 31 (CONTAINS) +Map 35 <- Union 29 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 19 <- Union 18 (SIMPLE_EDGE) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 9 <- Union 8 (SIMPLE_EDGE) +Reducer 17 <- Union 16 (SIMPLE_EDGE), Union 18 (CONTAINS) +Map 15 <- Union 16 (CONTAINS) Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) -Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Map 9 <- Union 2 (CONTAINS) +Map 26 <- Union 27 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 7 <- Union 8 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 + Reducer 6 File Output Operator [FS_122] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_120] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 7 [SIMPLE_EDGE] - |<-Reducer 31 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 25 [CONTAINS] | Reduce Output Operator [RS_119] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -891,149 +891,148 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_114] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_164] + | Merge Join Operator [MERGEJOIN_170] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 30 [SIMPLE_EDGE] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 24 [SIMPLE_EDGE] | | Reduce Output Operator [RS_110] | | key expressions:_col0 (type: string) | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_98] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_97] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 29 [SIMPLE_EDGE] - | | |<-Map 34 [CONTAINS] - | | | Reduce Output Operator [RS_96] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_95] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_91] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_156] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_90] - | | | alias:y - | | |<-Reducer 28 [CONTAINS] - | | Reduce Output Operator [RS_96] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_95] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Group By Operator [GBY_88] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | |<-Union 27 [SIMPLE_EDGE] - | | |<-Map 33 [CONTAINS] - | | | Reduce Output Operator [RS_87] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_86] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_82] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_155] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_81] - | | | alias:y - | | |<-Reducer 26 [CONTAINS] - | | Reduce Output Operator [RS_87] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_86] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Group By Operator [GBY_79] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | |<-Union 25 [SIMPLE_EDGE] - | | |<-Map 24 [CONTAINS] - | | | Reduce Output Operator [RS_78] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_77] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_71] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_153] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_70] - | | | alias:x - | | |<-Map 32 [CONTAINS] - | | Reduce Output Operator [RS_78] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_77] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_73] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_154] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_72] - | | alias:y - | |<-Reducer 36 [SIMPLE_EDGE] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_71] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_159] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_70] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 33 [SIMPLE_EDGE] | Reduce Output Operator [RS_112] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string), _col2 (type: string) - | Merge Join Operator [MERGEJOIN_161] + | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_169] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Map 35 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_104] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | |<-Map 37 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_106] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_100] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_102] | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_157] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_99] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Map 37 [SIMPLE_EDGE] - | Reduce Output Operator [RS_106] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_164] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_101] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 32 [SIMPLE_EDGE] + | Reduce Output Operator [RS_104] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_102] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_158] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_101] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 6 [CONTAINS] + | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_100] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_99] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | |<-Union 31 [SIMPLE_EDGE] + | |<-Reducer 30 [CONTAINS] + | | Reduce Output Operator [RS_98] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_97] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Group By Operator [GBY_90] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | |<-Union 29 [SIMPLE_EDGE] + | | |<-Map 35 [CONTAINS] + | | | Reduce Output Operator [RS_89] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_88] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_84] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_162] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_83] + | | | alias:y + | | |<-Reducer 28 [CONTAINS] + | | Reduce Output Operator [RS_89] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_88] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Group By Operator [GBY_81] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | |<-Union 27 [SIMPLE_EDGE] + | | |<-Map 34 [CONTAINS] + | | | Reduce Output Operator [RS_80] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_79] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_75] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_161] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_74] + | | | alias:y + | | |<-Map 26 [CONTAINS] + | | Reduce Output Operator [RS_80] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_79] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_73] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_160] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_72] + | | alias:x + | |<-Map 36 [CONTAINS] + | Reduce Output Operator [RS_98] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_97] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_93] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_163] + | predicate:value is not null (type: boolean) + | TableScan [TS_92] + | alias:y + |<-Reducer 4 [CONTAINS] Reduce Output Operator [RS_119] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1044,8 +1043,8 @@ Group By Operator [GBY_68] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - |<-Union 5 [SIMPLE_EDGE] - |<-Reducer 4 [CONTAINS] + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 14 [CONTAINS] | Reduce Output Operator [RS_67] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1053,99 +1052,124 @@ | Group By Operator [GBY_66] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_26] + | Select Operator [SEL_62] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_162] + | Merge Join Operator [MERGEJOIN_168] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 11 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_24] - | | key expressions:_col3 (type: string) - | | Map-reduce partition columns:_col3 (type: string) + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 13 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_58] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string) - | | Merge Join Operator [MERGEJOIN_159] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | | outputColumnNames:["_col1","_col2","_col3"] - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 10 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_16] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_12] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_146] - | | | predicate:key is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_11] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 12 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_18] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_14] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_147] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_13] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_28] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_154] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_27] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 20 [SIMPLE_EDGE] + | Reduce Output Operator [RS_60] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_144] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 9 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_145] - | predicate:value is not null (type: boolean) - | TableScan [TS_2] - | alias:y - |<-Reducer 18 [CONTAINS] + | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_167] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | |<-Map 23 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_54] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_50] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_158] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_49] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 19 [SIMPLE_EDGE] + | Reduce Output Operator [RS_52] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_48] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_47] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | |<-Union 18 [SIMPLE_EDGE] + | |<-Map 22 [CONTAINS] + | | Reduce Output Operator [RS_46] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_45] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_41] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_157] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_40] + | | alias:y + | |<-Reducer 17 [CONTAINS] + | Reduce Output Operator [RS_46] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_45] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_38] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 16 [SIMPLE_EDGE] + | |<-Map 21 [CONTAINS] + | | Reduce Output Operator [RS_37] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_36] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_32] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_156] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_31] + | | alias:y + | |<-Map 15 [CONTAINS] + | Reduce Output Operator [RS_37] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_36] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_30] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_155] + | predicate:value is not null (type: boolean) + | TableScan [TS_29] + | alias:x + |<-Reducer 2 [CONTAINS] Reduce Output Operator [RS_67] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1153,124 +1177,97 @@ Group By Operator [GBY_66] keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Select Operator [SEL_62] + Select Operator [SEL_26] outputColumnNames:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_163] + Merge Join Operator [MERGEJOIN_166] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col2","_col3"] - |<-Reducer 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_58] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3"] + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_22] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_46] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_45] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | |<-Union 16 [SIMPLE_EDGE] - | |<-Map 20 [CONTAINS] - | | Reduce Output Operator [RS_44] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_43] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_39] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_150] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_38] - | | alias:y - | |<-Reducer 15 [CONTAINS] - | Reduce Output Operator [RS_44] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_43] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Group By Operator [GBY_36] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | |<-Union 14 [SIMPLE_EDGE] - | |<-Map 13 [CONTAINS] - | | Reduce Output Operator [RS_35] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_34] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_28] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_148] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_27] - | | alias:x - | |<-Map 19 [CONTAINS] - | Reduce Output Operator [RS_35] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_34] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_30] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_149] - | predicate:value is not null (type: boolean) - | TableScan [TS_29] - | alias:y - |<-Reducer 22 [SIMPLE_EDGE] - Reduce Output Operator [RS_60] - key expressions:_col3 (type: string) - Map-reduce partition columns:_col3 (type: string) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_150] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 10 [SIMPLE_EDGE] + Reduce Output Operator [RS_24] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) sort order:+ - Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string), _col2 (type: string) - Merge Join Operator [MERGEJOIN_160] + Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_165] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3"] - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Map 21 [SIMPLE_EDGE] - | Reduce Output Operator [RS_52] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + |<-Map 12 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_48] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_14] | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_151] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_47] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map 23 [SIMPLE_EDGE] - Reduce Output Operator [RS_54] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_153] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_13] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_16] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string) - Select Operator [SEL_50] - outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_152] - predicate:(key is not null and value is not null) (type: boolean) - Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_49] - alias:x - Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_12] + outputColumnNames:["_col0"] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_11] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + |<-Union 8 [SIMPLE_EDGE] + |<-Map 11 [CONTAINS] + | Reduce Output Operator [RS_10] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_9] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_152] + | predicate:value is not null (type: boolean) + | TableScan [TS_4] + | alias:y + |<-Map 7 [CONTAINS] + Reduce Output Operator [RS_10] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_9] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_3] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_151] + predicate:value is not null (type: boolean) + TableScan [TS_2] + alias:x PREHOOK: query: EXPLAIN SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -1301,7 +1298,7 @@ Map Join Operator [MAPJOIN_29] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col3 (type: string)"} - | outputColumnNames:["_col1","_col2","_col5"] + | outputColumnNames:["_col0","_col4","_col5"] | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [BROADCAST_EDGE] | Reduce Output Operator [RS_14] @@ -1309,45 +1306,44 @@ | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_1] - | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_25] - | predicate:key is not null (type: boolean) + | predicate:value is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_0] - | alias:y + | alias:z | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map Join Operator [MAPJOIN_28] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col1 (type: string)"} - | outputColumnNames:["_col0","_col3"] + | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE |<-Map 3 [BROADCAST_EDGE] | Reduce Output Operator [RS_10] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string) + | value expressions:_col1 (type: string) | Select Operator [SEL_6] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_27] - | predicate:(value is not null and key is not null) (type: boolean) + | predicate:(key is not null and value is not null) (type: boolean) | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_5] | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_4] - outputColumnNames:["_col0"] + outputColumnNames:["_col0","_col1"] Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator [FIL_26] - predicate:value is not null (type: boolean) + predicate:key is not null (type: boolean) Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_2] - alias:z + TableScan [TS_3] + alias:y Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN select @@ -1408,17 +1404,17 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 2 <- Map 1 (BROADCAST_EDGE) -Map 10 <- Map 9 (BROADCAST_EDGE) -Map 5 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) +Map 4 <- Map 3 (BROADCAST_EDGE) +Map 7 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 + Reducer 9 File Output Operator [FS_71] compressed:false Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE @@ -1428,190 +1424,190 @@ Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_69] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 6 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 8 [SIMPLE_EDGE] Reduce Output Operator [RS_68] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_66] | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Map 5 [SIMPLE_EDGE] + | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] Reduce Output Operator [RS_65] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_64] aggregations:["count(_col3)","count(_col4)","count(_col5)"] keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_62] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_113] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_111] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 5":"_col15 (type: string), _col17 (type: string)"} - | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] - | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + | keys:{"Map 2":"_col8 (type: string), _col10 (type: string)","Map 7":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE |<-Map 2 [BROADCAST_EDGE] | Reduce Output Operator [RS_58] - | key expressions:_col1 (type: string), _col3 (type: string) - | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | key expressions:_col8 (type: string), _col10 (type: string) + | Map-reduce partition columns:_col8 (type: string), _col10 (type: string) | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col2 (type: string) - | Map Join Operator [MAPJOIN_107] + | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string) + | Map Join Operator [MAPJOIN_110] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 1 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_53] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_99] - | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:cs - | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_4] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 4":"_col3 (type: string), _col5 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"] + | | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + | |<-Map 4 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_55] + | | key expressions:_col3 (type: string), _col5 (type: string) + | | Map-reduce partition columns:_col3 (type: string), _col5 (type: string) + | | sort order:++ + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col2 (type: string), _col4 (type: string) + | | Map Join Operator [MAPJOIN_106] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 3":"_col0 (type: string)","Map 4":"_col0 (type: string)"} + | | | outputColumnNames:["_col2","_col3","_col4","_col5"] + | | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 3 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_12] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | | Select Operator [SEL_7] + | | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_99] + | | | predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_5] + | | | alias:sr + | | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_10] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_100] + | | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_8] + | | alias:d1 + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map Join Operator [MAPJOIN_105] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_48] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_97] + | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:cs + | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_4] + | outputColumnNames:["_col0"] | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_51] - outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_112] + | Filter Operator [FIL_98] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_46] + outputColumnNames:["_col10","_col6","_col7","_col8"] + Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_109] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 10":"_col2 (type: string), _col4 (type: string)","Map 5":"_col8 (type: string), _col10 (type: string)"} - | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [BROADCAST_EDGE] - | Reduce Output Operator [RS_49] - | key expressions:_col2 (type: string), _col4 (type: string) - | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) - | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col3 (type: string), _col5 (type: string) - | Map Join Operator [MAPJOIN_111] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 10":"_col0 (type: string)","Map 9":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 9 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_36] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - | | Select Operator [SEL_31] - | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_105] - | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_29] - | | alias:sr - | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_34] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_106] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_32] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_110] + | keys:{"Map 5":"_col1 (type: string)","Map 7":"_col5 (type: string)"} + | outputColumnNames:["_col6","_col7","_col8","_col10"] + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + |<-Map 5 [BROADCAST_EDGE] + | Reduce Output Operator [RS_42] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_19] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_101] + | predicate:((key = 'src1key') and value is not null) (type: boolean) + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_17] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_108] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 3":"_col1 (type: string)","Map 5":"_col5 (type: string)"} - | outputColumnNames:["_col6","_col7","_col8","_col10"] - | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - |<-Map 3 [BROADCAST_EDGE] - | Reduce Output Operator [RS_42] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) + | keys:{"Map 7":"_col2 (type: string)","Map 6":"_col0 (type: string)"} + | outputColumnNames:["_col4","_col5","_col6","_col8"] + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + |<-Map 6 [BROADCAST_EDGE] + | Reduce Output Operator [RS_36] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_7] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_101] - | predicate:((key = 'src1key') and value is not null) (type: boolean) - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_5] - | alias:src1 - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_109] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_22] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_102] + | predicate:((value = 'd1value') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_20] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_107] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 5":"_col2 (type: string)","Map 4":"_col0 (type: string)"} - | outputColumnNames:["_col4","_col5","_col6","_col8"] - | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - |<-Map 4 [BROADCAST_EDGE] - | Reduce Output Operator [RS_24] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | keys:{"Map 10":"_col3 (type: string)","Map 7":"_col1 (type: string)"} + | outputColumnNames:["_col2","_col3","_col4","_col6"] + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [BROADCAST_EDGE] + | Reduce Output Operator [RS_32] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_102] - | predicate:((value = 'd1value') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_8] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_108] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 5":"_col1 (type: string)","Map 8":"_col3 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col6"] - | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - |<-Map 8 [BROADCAST_EDGE] - | Reduce Output Operator [RS_20] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) - | sort order:+ - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - | Select Operator [SEL_16] - | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_104] - | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_14] - | alias:ss - | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_13] - outputColumnNames:["_col1"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + | Select Operator [SEL_28] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_104] + | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_26] + | alias:ss + | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_25] + outputColumnNames:["_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_103] + predicate:((key = 'srcpartkey') and value is not null) (type: boolean) Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_103] - predicate:((key = 'srcpartkey') and value is not null) (type: boolean) - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_11] - alias:srcpart - Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_23] + alias:srcpart + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -1633,31 +1629,31 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 10 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 11 <- Union 10 (SIMPLE_EDGE) -Map 13 <- Map 14 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 1 <- Union 2 (CONTAINS) -Map 5 <- Map 8 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 4 <- Union 2 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) -Map 9 <- Union 10 (CONTAINS) +Reducer 12 <- Map 14 (BROADCAST_EDGE), Union 11 (SIMPLE_EDGE) +Map 13 <- Union 11 (CONTAINS) +Map 1 <- Reducer 6 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 10 <- Union 11 (CONTAINS) +Map 4 <- Union 5 (CONTAINS) +Map 7 <- Union 5 (CONTAINS) +Reducer 6 <- Map 8 (BROADCAST_EDGE), Union 5 (SIMPLE_EDGE) +Map 9 <- Reducer 12 (BROADCAST_EDGE), Union 2 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 + Reducer 3 File Output Operator [FS_61] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_59] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 6 [SIMPLE_EDGE] - |<-Map 13 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] | Reduce Output Operator [RS_58] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1665,81 +1661,83 @@ | Group By Operator [GBY_57] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_53] + | Select Operator [SEL_26] | outputColumnNames:["_col0","_col1"] | Map Join Operator [MAPJOIN_85] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Reducer 11":"_col0 (type: string)","Map 13":"_col2 (type: string)"} - | | outputColumnNames:["_col0","_col2"] - | |<-Reducer 11 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_49] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"Map 1":"_col0 (type: string)","Reducer 6":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col2"] + | |<-Reducer 6 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_24] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_37] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_36] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 10 [SIMPLE_EDGE] - | | |<-Map 12 [CONTAINS] - | | | Reduce Output Operator [RS_35] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_34] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_30] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_79] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_29] - | | | alias:y - | | |<-Map 9 [CONTAINS] - | | Reduce Output Operator [RS_35] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_34] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_28] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Map Join Operator [MAPJOIN_84] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Reducer 6":"_col0 (type: string)","Map 8":"_col1 (type: string)"} + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 8 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_18] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_14] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_79] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_13] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_12] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_11] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 5 [SIMPLE_EDGE] + | | |<-Map 4 [CONTAINS] + | | | Reduce Output Operator [RS_10] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_9] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_3] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_77] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_2] + | | | alias:x + | | |<-Map 7 [CONTAINS] + | | Reduce Output Operator [RS_10] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_9] + | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_78] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_27] - | | alias:x - | |<-Map Join Operator [MAPJOIN_83] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 14":"_col0 (type: string)","Map 13":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2"] - | |<-Map 14 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_45] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_41] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_81] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_40] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_39] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_80] - | predicate:key is not null (type: boolean) - | TableScan [TS_38] - | alias:y - |<-Map 5 [CONTAINS] + | | Select Operator [SEL_5] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_78] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_4] + | | alias:y + | |<-Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_76] + | predicate:key is not null (type: boolean) + | TableScan [TS_0] + | alias:y + |<-Map 9 [CONTAINS] Reduce Output Operator [RS_58] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1747,80 +1745,82 @@ Group By Operator [GBY_57] keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Select Operator [SEL_26] + Select Operator [SEL_53] outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_84] + Map Join Operator [MAPJOIN_87] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col2 (type: string)"} - | outputColumnNames:["_col0","_col2"] - |<-Reducer 3 [BROADCAST_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | keys:{"Reducer 12":"_col1 (type: string)","Map 9":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2"] + |<-Reducer 12 [BROADCAST_EDGE] + | Reduce Output Operator [RS_51] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_74] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 4 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Map Join Operator [MAPJOIN_86] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 14":"_col1 (type: string)","Reducer 12":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 14 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_45] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_41] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_83] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_40] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_39] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_38] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 11 [SIMPLE_EDGE] + | |<-Map 13 [CONTAINS] + | | Reduce Output Operator [RS_37] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_36] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_32] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_82] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_31] + | | alias:y + | |<-Map 10 [CONTAINS] + | Reduce Output Operator [RS_37] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_36] + | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_75] - | predicate:value is not null (type: boolean) - | TableScan [TS_2] - | alias:y - |<-Map Join Operator [MAPJOIN_82] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 5":"_col0 (type: string)","Map 8":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2"] - |<-Map 8 [BROADCAST_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_14] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_77] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_13] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_12] - outputColumnNames:["_col0"] - Filter Operator [FIL_76] - predicate:key is not null (type: boolean) - TableScan [TS_11] - alias:y + | Select Operator [SEL_30] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_81] + | predicate:value is not null (type: boolean) + | TableScan [TS_29] + | alias:x + |<-Select Operator [SEL_28] + outputColumnNames:["_col0"] + Filter Operator [FIL_80] + predicate:key is not null (type: boolean) + TableScan [TS_27] + alias:y PREHOOK: query: explain SELECT x.key, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -1850,42 +1850,42 @@ Plan optimized by CBO. Vertex dependency in root stage -Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) -Reducer 13 <- Union 12 (SIMPLE_EDGE), Union 14 (CONTAINS) -Map 30 <- Map 31 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 8 (CONTAINS) -Map 11 <- Union 12 (CONTAINS) -Reducer 24 <- Union 23 (SIMPLE_EDGE), Union 25 (CONTAINS) -Map 1 <- Union 2 (CONTAINS) -Map 20 <- Union 21 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 9 <- Union 8 (SIMPLE_EDGE) -Reducer 26 <- Union 25 (SIMPLE_EDGE) -Map 16 <- Union 12 (CONTAINS) -Map 29 <- Union 25 (CONTAINS) -Map 28 <- Union 23 (CONTAINS) -Reducer 15 <- Union 14 (SIMPLE_EDGE) -Map 18 <- Map 19 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 27 <- Union 21 (CONTAINS) -Map 17 <- Union 14 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Map 5 <- Map 10 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS) -Map 4 <- Union 2 (CONTAINS) +Map 12 <- Union 13 (CONTAINS) +Map 30 <- Union 26 (CONTAINS) +Reducer 23 <- Union 22 (SIMPLE_EDGE), Union 24 (CONTAINS) +Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) +Map 11 <- Reducer 16 (BROADCAST_EDGE), Union 2 (CONTAINS) +Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) +Map 21 <- Union 22 (CONTAINS) +Map 1 <- Reducer 8 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 20 <- Reducer 27 (BROADCAST_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Map 29 <- Union 24 (CONTAINS) +Reducer 8 <- Map 10 (BROADCAST_EDGE), Union 7 (SIMPLE_EDGE) +Reducer 27 <- Map 31 (BROADCAST_EDGE), Union 26 (SIMPLE_EDGE) +Map 28 <- Union 22 (CONTAINS) +Map 18 <- Union 15 (CONTAINS) +Reducer 16 <- Map 19 (BROADCAST_EDGE), Union 15 (SIMPLE_EDGE) +Map 17 <- Union 13 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 6 <- Union 7 (CONTAINS) +Map 9 <- Union 7 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 + Reducer 5 File Output Operator [FS_122] compressed:false - Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Group By Operator [GBY_120] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - |<-Union 8 [SIMPLE_EDGE] - |<-Map 30 [CONTAINS] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Union 4 [SIMPLE_EDGE] + |<-Map 20 [CONTAINS] | Reduce Output Operator [RS_119] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1895,131 +1895,132 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_114] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_164] + | Map Join Operator [MAPJOIN_170] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 30":"_col3 (type: string)","Reducer 26":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 26 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_110] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"Map 20":"_col0 (type: string)","Reducer 27":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Reducer 27 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_112] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_98] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_97] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 25 [SIMPLE_EDGE] - | | |<-Reducer 24 [CONTAINS] - | | | Reduce Output Operator [RS_96] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_95] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Group By Operator [GBY_88] - | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | | outputColumnNames:["_col0","_col1"] - | | | |<-Union 23 [SIMPLE_EDGE] - | | | |<-Reducer 22 [CONTAINS] - | | | | Reduce Output Operator [RS_87] - | | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | | sort order:++ - | | | | Group By Operator [GBY_86] - | | | | keys:_col0 (type: string), _col1 (type: string) - | | | | outputColumnNames:["_col0","_col1"] - | | | | Group By Operator [GBY_79] - | | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | | | outputColumnNames:["_col0","_col1"] - | | | | |<-Union 21 [SIMPLE_EDGE] - | | | | |<-Map 20 [CONTAINS] - | | | | | Reduce Output Operator [RS_78] - | | | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | | | sort order:++ - | | | | | Group By Operator [GBY_77] - | | | | | keys:_col0 (type: string), _col1 (type: string) - | | | | | outputColumnNames:["_col0","_col1"] - | | | | | Select Operator [SEL_71] - | | | | | outputColumnNames:["_col0","_col1"] - | | | | | Filter Operator [FIL_153] - | | | | | predicate:value is not null (type: boolean) - | | | | | TableScan [TS_70] - | | | | | alias:x - | | | | |<-Map 27 [CONTAINS] - | | | | Reduce Output Operator [RS_78] - | | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | | sort order:++ - | | | | Group By Operator [GBY_77] - | | | | keys:_col0 (type: string), _col1 (type: string) - | | | | outputColumnNames:["_col0","_col1"] - | | | | Select Operator [SEL_73] - | | | | outputColumnNames:["_col0","_col1"] - | | | | Filter Operator [FIL_154] - | | | | predicate:value is not null (type: boolean) - | | | | TableScan [TS_72] - | | | | alias:y - | | | |<-Map 28 [CONTAINS] - | | | Reduce Output Operator [RS_87] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_86] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_82] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_155] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_81] - | | | alias:y - | | |<-Map 29 [CONTAINS] - | | Reduce Output Operator [RS_96] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_95] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_91] + | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | | Map Join Operator [MAPJOIN_169] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 31":"_col1 (type: string)","Reducer 27":"_col0 (type: string)"} + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 31 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_106] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_102] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_164] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_101] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_100] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_99] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 26 [SIMPLE_EDGE] + | | |<-Map 30 [CONTAINS] + | | | Reduce Output Operator [RS_98] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_97] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_93] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_163] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_92] + | | | alias:y + | | |<-Reducer 25 [CONTAINS] + | | Reduce Output Operator [RS_98] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_97] + | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_156] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_90] - | | alias:y - | |<-Map Join Operator [MAPJOIN_161] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 30":"_col0 (type: string)","Map 31":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | |<-Map 31 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_106] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_102] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_158] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_101] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_100] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_157] - | predicate:key is not null (type: boolean) - | TableScan [TS_99] - | alias:y - |<-Reducer 7 [CONTAINS] + | | Group By Operator [GBY_90] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | |<-Union 24 [SIMPLE_EDGE] + | | |<-Reducer 23 [CONTAINS] + | | | Reduce Output Operator [RS_89] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_88] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Group By Operator [GBY_81] + | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | |<-Union 22 [SIMPLE_EDGE] + | | | |<-Map 21 [CONTAINS] + | | | | Reduce Output Operator [RS_80] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_79] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Select Operator [SEL_73] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Filter Operator [FIL_160] + | | | | predicate:value is not null (type: boolean) + | | | | TableScan [TS_72] + | | | | alias:x + | | | |<-Map 28 [CONTAINS] + | | | Reduce Output Operator [RS_80] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_79] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_75] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_161] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_74] + | | | alias:y + | | |<-Map 29 [CONTAINS] + | | Reduce Output Operator [RS_89] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_88] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_84] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_162] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_83] + | | alias:y + | |<-Select Operator [SEL_71] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_159] + | predicate:key is not null (type: boolean) + | TableScan [TS_70] + | alias:y + |<-Reducer 3 [CONTAINS] Reduce Output Operator [RS_119] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2030,8 +2031,8 @@ Group By Operator [GBY_68] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - |<-Union 6 [SIMPLE_EDGE] - |<-Map 18 [CONTAINS] + |<-Union 2 [SIMPLE_EDGE] + |<-Map 11 [CONTAINS] | Reduce Output Operator [RS_67] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2041,105 +2042,106 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_62] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_163] + | Map Join Operator [MAPJOIN_168] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Reducer 15":"_col0 (type: string)","Map 18":"_col3 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Reducer 15 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_58] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | keys:{"Map 11":"_col0 (type: string)","Reducer 16":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Reducer 16 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_60] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_46] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_45] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 14 [SIMPLE_EDGE] - | | |<-Reducer 13 [CONTAINS] - | | | Reduce Output Operator [RS_44] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_43] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Group By Operator [GBY_36] - | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | | outputColumnNames:["_col0","_col1"] - | | | |<-Union 12 [SIMPLE_EDGE] - | | | |<-Map 11 [CONTAINS] - | | | | Reduce Output Operator [RS_35] - | | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | | sort order:++ - | | | | Group By Operator [GBY_34] - | | | | keys:_col0 (type: string), _col1 (type: string) - | | | | outputColumnNames:["_col0","_col1"] - | | | | Select Operator [SEL_28] - | | | | outputColumnNames:["_col0","_col1"] - | | | | Filter Operator [FIL_148] - | | | | predicate:value is not null (type: boolean) - | | | | TableScan [TS_27] - | | | | alias:x - | | | |<-Map 16 [CONTAINS] - | | | Reduce Output Operator [RS_35] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_34] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_30] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_149] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_29] - | | | alias:y - | | |<-Map 17 [CONTAINS] - | | Reduce Output Operator [RS_44] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_43] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_39] + | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | Map Join Operator [MAPJOIN_167] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 19":"_col1 (type: string)","Reducer 16":"_col0 (type: string)"} + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 19 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_54] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_50] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_158] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_49] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_48] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_47] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 15 [SIMPLE_EDGE] + | | |<-Reducer 14 [CONTAINS] + | | | Reduce Output Operator [RS_46] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_45] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Group By Operator [GBY_38] + | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | |<-Union 13 [SIMPLE_EDGE] + | | | |<-Map 12 [CONTAINS] + | | | | Reduce Output Operator [RS_37] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_36] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Select Operator [SEL_30] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Filter Operator [FIL_155] + | | | | predicate:value is not null (type: boolean) + | | | | TableScan [TS_29] + | | | | alias:x + | | | |<-Map 17 [CONTAINS] + | | | Reduce Output Operator [RS_37] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_36] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_32] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_156] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_31] + | | | alias:y + | | |<-Map 18 [CONTAINS] + | | Reduce Output Operator [RS_46] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_45] + | | keys:_col0 (type: string), _col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_150] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_38] - | | alias:y - | |<-Map Join Operator [MAPJOIN_160] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 19":"_col0 (type: string)","Map 18":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | |<-Map 19 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_54] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_50] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_152] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_49] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_48] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_151] - | predicate:key is not null (type: boolean) - | TableScan [TS_47] - | alias:y - |<-Map 5 [CONTAINS] + | | Select Operator [SEL_41] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_157] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_40] + | | alias:y + | |<-Select Operator [SEL_28] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_154] + | predicate:key is not null (type: boolean) + | TableScan [TS_27] + | alias:y + |<-Map 1 [CONTAINS] Reduce Output Operator [RS_67] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2149,78 +2151,79 @@ outputColumnNames:["_col0","_col1"] Select Operator [SEL_26] outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_162] + Map Join Operator [MAPJOIN_166] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col3 (type: string)"} - | outputColumnNames:["_col2","_col3"] - |<-Reducer 3 [BROADCAST_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | keys:{"Map 1":"_col0 (type: string)","Reducer 8":"_col1 (type: string)"} + | outputColumnNames:["_col1","_col3"] + |<-Reducer 8 [BROADCAST_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_144] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 4 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | Map Join Operator [MAPJOIN_165] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 10":"_col1 (type: string)","Reducer 8":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 10 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_14] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_153] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_13] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_12] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_11] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 7 [SIMPLE_EDGE] + | |<-Map 6 [CONTAINS] + | | Reduce Output Operator [RS_10] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_9] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_3] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_151] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_2] + | | alias:x + | |<-Map 9 [CONTAINS] + | Reduce Output Operator [RS_10] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_9] + | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_145] - | predicate:value is not null (type: boolean) - | TableScan [TS_2] - | alias:y - |<-Map Join Operator [MAPJOIN_159] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 10":"_col0 (type: string)","Map 5":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3"] - |<-Map 10 [BROADCAST_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_14] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_147] - | predicate:(key is not null and value is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_13] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_12] - outputColumnNames:["_col0","_col1"] - Filter Operator [FIL_146] - predicate:key is not null (type: boolean) - TableScan [TS_11] - alias:y + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_152] + | predicate:value is not null (type: boolean) + | TableScan [TS_4] + | alias:y + |<-Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_150] + predicate:key is not null (type: boolean) + TableScan [TS_0] + alias:y PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -2782,309 +2785,352 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 13 (CONTAINS) -Map 14 <- Union 13 (CONTAINS) -Map 21 <- Map 20 (BROADCAST_EDGE) -Map 1 <- Union 2 (CONTAINS) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 13 (SIMPLE_EDGE), Union 4 (CONTAINS) -Map 19 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Map 16 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 15 <- Union 13 (CONTAINS) -Map 18 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 17 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Map 5 <- Union 2 (CONTAINS) -Map 6 <- Map 7 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 5 (SIMPLE_EDGE) +Map 12 <- Union 9 (CONTAINS) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 13 <- Union 9 (CONTAINS) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) +Map 4 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS) +Map 19 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 6 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS) +Map 16 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 8 <- Union 9 (CONTAINS) +Map 18 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 17 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Union 4 - |<-Reducer 10 [CONTAINS] - | File Output Operator [FS_77] + Union 3 + |<-Reducer 2 [CONTAINS] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_21] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_120] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_17] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_104] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Union 5 [SIMPLE_EDGE] + | |<-Map 4 [CONTAINS] + | | Reduce Output Operator [RS_19] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Map Join Operator [MAPJOIN_119] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 4":"_col0 (type: string)","Map 7":"_col1 (type: string)"} + | | | outputColumnNames:["_col1"] + | | |<-Map 7 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_13] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_9] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_107] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_8] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | | Reduce Output Operator [RS_125] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Please refer to the previous Select Operator [SEL_9] + | | |<-Select Operator [SEL_3] + | | outputColumnNames:["_col0"] + | | Filter Operator [FIL_105] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_2] + | | alias:x + | |<-Map 6 [CONTAINS] + | Reduce Output Operator [RS_19] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Map Join Operator [MAPJOIN_119] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 7":"_col1 (type: string)","Map 6":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<- Please refer to the previous Map 7 [BROADCAST_EDGE] + | |<-Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_106] + | predicate:value is not null (type: boolean) + | TableScan [TS_4] + | alias:y + |<-Reducer 11 [CONTAINS] + | File Output Operator [FS_76] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} | Select Operator [SEL_45] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_118] + | Merge Join Operator [MERGEJOIN_122] | | condition map:[{"":"Inner Join 0 to 1"}] | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} - | | outputColumnNames:["_col0","_col3"] - | |<-Reducer 9 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_41] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) + | | outputColumnNames:["_col1","_col4"] + | |<-Map 15 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_43] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Merge Join Operator [MERGEJOIN_115] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | | outputColumnNames:["_col0","_col1","_col3"] - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 11 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_38] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_25] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_104] - | | | predicate:key is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_24] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 8 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_36] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_23] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_103] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_22] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Union 13 [SIMPLE_EDGE] - | |<-Map 12 [CONTAINS] - | | Reduce Output Operator [RS_43] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Select Operator [SEL_27] - | | outputColumnNames:["_col0"] - | | Filter Operator [FIL_105] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_26] - | | alias:x - | |<-Map 14 [CONTAINS] - | | Reduce Output Operator [RS_43] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Select Operator [SEL_29] - | | outputColumnNames:["_col0"] - | | Filter Operator [FIL_106] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_28] - | | alias:y - | |<-Map 15 [CONTAINS] - | Reduce Output Operator [RS_43] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Select Operator [SEL_33] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_107] - | predicate:value is not null (type: boolean) - | TableScan [TS_32] - | alias:y + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_34] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_112] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_33] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_41] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_121] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE + | |<-Map 14 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_38] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_32] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_111] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_31] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Union 9 [SIMPLE_EDGE] + | |<-Map 12 [CONTAINS] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Select Operator [SEL_25] + | | outputColumnNames:["_col0"] + | | Filter Operator [FIL_109] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_24] + | | alias:y + | |<-Map 13 [CONTAINS] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Select Operator [SEL_29] + | | outputColumnNames:["_col0"] + | | Filter Operator [FIL_110] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_28] + | | alias:y + | |<-Map 8 [CONTAINS] + | Reduce Output Operator [RS_36] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Select Operator [SEL_23] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_108] + | predicate:value is not null (type: boolean) + | TableScan [TS_22] + | alias:x |<-Map 19 [CONTAINS] - | File Output Operator [FS_77] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] + | Select Operator [SEL_74] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] + | Map Join Operator [MAPJOIN_124] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 19":"_col0 (type: string)"} + | | keys:{"Map 21":"_col0 (type: string)","Map 19":"_col1 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<-Map 21 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_73] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) + | | Reduce Output Operator [RS_72] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Map Join Operator [MAPJOIN_116] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 21":"_col0 (type: string)","Map 20":"_col0 (type: string)"} - | | | outputColumnNames:["_col0","_col1","_col3"] - | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 20 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_65] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_61] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_112] - | | | predicate:(key is not null and value is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_60] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_63] - | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_63] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_118] + | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_113] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_62] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_122] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) + | | TableScan [TS_62] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_131] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_116] - | | Reduce Output Operator [RS_123] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Please refer to the previous Select Operator [SEL_63] + | | Reduce Output Operator [RS_132] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_116] - | | Reduce Output Operator [RS_124] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Please refer to the previous Select Operator [SEL_63] + | | Reduce Output Operator [RS_133] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_116] - | |<-Select Operator [SEL_58] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_111] - | predicate:value is not null (type: boolean) - | TableScan [TS_57] - | alias:y + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Please refer to the previous Select Operator [SEL_63] + | |<-Map Join Operator [MAPJOIN_123] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 20":"_col1 (type: string)","Map 19":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<-Map 20 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_67] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_61] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_117] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_60] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_127] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_61] + | | Reduce Output Operator [RS_128] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_61] + | | Reduce Output Operator [RS_129] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_61] + | |<-Select Operator [SEL_58] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_116] + | predicate:value is not null (type: boolean) + | TableScan [TS_57] + | alias:y |<-Map 16 [CONTAINS] - | File Output Operator [FS_77] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] + | Select Operator [SEL_74] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] + | Map Join Operator [MAPJOIN_124] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 16":"_col0 (type: string)"} + | | keys:{"Map 21":"_col0 (type: string)","Map 16":"_col1 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Select Operator [SEL_49] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_108] - | predicate:value is not null (type: boolean) - | TableScan [TS_48] - | alias:x + | |<-Map Join Operator [MAPJOIN_123] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 20":"_col1 (type: string)","Map 16":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<- Please refer to the previous Map 20 [BROADCAST_EDGE] + | |<-Select Operator [SEL_49] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_113] + | predicate:value is not null (type: boolean) + | TableScan [TS_48] + | alias:x |<-Map 18 [CONTAINS] - | File Output Operator [FS_77] + | File Output Operator [FS_76] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] + | Select Operator [SEL_74] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] + | Map Join Operator [MAPJOIN_124] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 18":"_col0 (type: string)"} + | | keys:{"Map 21":"_col0 (type: string)","Map 18":"_col1 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Select Operator [SEL_55] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_110] - | predicate:value is not null (type: boolean) - | TableScan [TS_54] - | alias:y + | |<-Map Join Operator [MAPJOIN_123] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 20":"_col1 (type: string)","Map 18":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | |<- Please refer to the previous Map 20 [BROADCAST_EDGE] + | |<-Select Operator [SEL_55] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_115] + | predicate:value is not null (type: boolean) + | TableScan [TS_54] + | alias:y |<-Map 17 [CONTAINS] - | File Output Operator [FS_77] - | compressed:false - | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_75] - | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_119] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 17":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col4"] - | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Select Operator [SEL_51] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_109] - | predicate:value is not null (type: boolean) - | TableScan [TS_50] - | alias:y - |<-Reducer 3 [CONTAINS] - File Output Operator [FS_77] + File Output Operator [FS_76] compressed:false table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_21] + Select Operator [SEL_74] outputColumnNames:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_117] + Map Join Operator [MAPJOIN_124] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col2","_col3"] - |<-Map 6 [SIMPLE_EDGE] - | Reduce Output Operator [RS_19] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) - | sort order:+ - | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string), _col2 (type: string) - | Map Join Operator [MAPJOIN_114] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 7":"_col0 (type: string)","Map 6":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Map 7 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_13] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_9] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_102] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_8] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_7] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_101] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_6] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Union 2 [SIMPLE_EDGE] - |<-Map 1 [CONTAINS] - | Reduce Output Operator [RS_17] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Select Operator [SEL_1] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_99] - | predicate:value is not null (type: boolean) - | TableScan [TS_0] - | alias:x - |<-Map 5 [CONTAINS] - Reduce Output Operator [RS_17] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Select Operator [SEL_3] - outputColumnNames:["_col0"] - Filter Operator [FIL_100] - predicate:value is not null (type: boolean) - TableScan [TS_2] - alias:y + | keys:{"Map 21":"_col0 (type: string)","Map 17":"_col1 (type: string)"} + | outputColumnNames:["_col1","_col4"] + |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + |<-Map Join Operator [MAPJOIN_123] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 20":"_col1 (type: string)","Map 17":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + |<- Please refer to the previous Map 20 [BROADCAST_EDGE] + |<-Select Operator [SEL_51] + outputColumnNames:["_col0"] + Filter Operator [FIL_114] + predicate:value is not null (type: boolean) + TableScan [TS_50] + alias:y PREHOOK: query: explain SELECT x.key, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -3114,56 +3160,54 @@ Plan optimized by CBO. Vertex dependency in root stage +Map 12 <- Union 13 (CONTAINS) Map 30 <- Union 24 (CONTAINS) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) Map 23 <- Union 24 (CONTAINS) Map 32 <- Union 28 (CONTAINS) Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) -Map 22 <- Union 19 (CONTAINS) Map 31 <- Union 26 (CONTAINS) -Map 21 <- Union 17 (CONTAINS) -Map 34 <- Map 33 (BROADCAST_EDGE) -Map 1 <- Union 2 (CONTAINS) -Reducer 20 <- Union 19 (SIMPLE_EDGE) -Map 10 <- Map 11 (BROADCAST_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Map 16 <- Union 17 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) +Map 20 <- Union 15 (CONTAINS) +Map 10 <- Union 8 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 19 <- Union 13 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 9 <- Map 11 (BROADCAST_EDGE), Union 8 (SIMPLE_EDGE) +Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS) -Reducer 29 <- Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Map 9 <- Union 2 (CONTAINS) +Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 29 <- Map 33 (BROADCAST_EDGE), Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 16 <- Union 15 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 7 <- Union 8 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 - File Output Operator [FS_121] + Reducer 6 + File Output Operator [FS_120] compressed:false - Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Group By Operator [GBY_119] + Group By Operator [GBY_118] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE - |<-Union 7 [SIMPLE_EDGE] - |<-Reducer 6 [CONTAINS] - | Reduce Output Operator [RS_118] + | Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 4 [CONTAINS] + | Reduce Output Operator [RS_117] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_117] + | Group By Operator [GBY_116] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] | Group By Operator [GBY_67] | | keys:KEY._col0 (type: string), KEY._col1 (type: string) | | outputColumnNames:["_col0","_col1"] - | |<-Union 5 [SIMPLE_EDGE] - | |<-Reducer 14 [CONTAINS] + | |<-Union 3 [SIMPLE_EDGE] + | |<-Reducer 18 [CONTAINS] | | Reduce Output Operator [RS_66] | | key expressions:_col0 (type: string), _col1 (type: string) | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -3173,123 +3217,122 @@ | | outputColumnNames:["_col0","_col1"] | | Select Operator [SEL_61] | | outputColumnNames:["_col0","_col1"] - | | Merge Join Operator [MERGEJOIN_162] + | | Merge Join Operator [MERGEJOIN_166] | | | condition map:[{"":"Inner Join 0 to 1"}] | | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} - | | | outputColumnNames:["_col0","_col3"] - | | |<-Reducer 13 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_57] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) + | | | outputColumnNames:["_col1","_col4"] + | | |<-Map 22 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_59] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) | | | sort order:+ - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string), _col3 (type: string) - | | | Merge Join Operator [MERGEJOIN_159] - | | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | | | outputColumnNames:["_col0","_col1","_col3"] - | | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | | |<-Map 12 [SIMPLE_EDGE] - | | | | Reduce Output Operator [RS_52] - | | | | key expressions:_col0 (type: string) - | | | | Map-reduce partition columns:_col0 (type: string) - | | | | sort order:+ - | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | | value expressions:_col1 (type: string) - | | | | Select Operator [SEL_28] - | | | | outputColumnNames:["_col0","_col1"] - | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | | Filter Operator [FIL_147] - | | | | predicate:(key is not null and value is not null) (type: boolean) - | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | | TableScan [TS_27] - | | | | alias:y - | | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | | |<-Map 15 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_54] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_50] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_156] + | | | predicate:key is not null (type: boolean) | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_30] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_148] - | | | predicate:key is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_29] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Reducer 20 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_59] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | | TableScan [TS_49] + | | | alias:y + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 17 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_57] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_50] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_49] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 19 [SIMPLE_EDGE] - | | |<-Map 22 [CONTAINS] - | | | Reduce Output Operator [RS_48] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_47] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_43] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_151] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_42] - | | | alias:y - | | |<-Reducer 18 [CONTAINS] - | | Reduce Output Operator [RS_48] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_47] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Group By Operator [GBY_40] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | |<-Union 17 [SIMPLE_EDGE] - | | |<-Map 21 [CONTAINS] - | | | Reduce Output Operator [RS_39] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Group By Operator [GBY_38] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Select Operator [SEL_34] - | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_150] - | | | predicate:value is not null (type: boolean) - | | | TableScan [TS_33] - | | | alias:y - | | |<-Map 16 [CONTAINS] - | | Reduce Output Operator [RS_39] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_38] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_32] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_149] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_31] - | | alias:x - | |<-Reducer 4 [CONTAINS] + | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | Merge Join Operator [MERGEJOIN_165] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 21 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_54] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string) + | | | Select Operator [SEL_48] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_155] + | | | predicate:(value is not null and key is not null) (type: boolean) + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_47] + | | | alias:y + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 16 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_52] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_46] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_45] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 15 [SIMPLE_EDGE] + | | |<-Reducer 14 [CONTAINS] + | | | Reduce Output Operator [RS_44] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_43] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Group By Operator [GBY_36] + | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | |<-Union 13 [SIMPLE_EDGE] + | | | |<-Map 12 [CONTAINS] + | | | | Reduce Output Operator [RS_35] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_34] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Select Operator [SEL_28] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Filter Operator [FIL_152] + | | | | predicate:value is not null (type: boolean) + | | | | TableScan [TS_27] + | | | | alias:x + | | | |<-Map 19 [CONTAINS] + | | | Reduce Output Operator [RS_35] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_34] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_30] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_153] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_29] + | | | alias:y + | | |<-Map 20 [CONTAINS] + | | Reduce Output Operator [RS_44] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_43] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_39] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_154] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_38] + | | alias:y + | |<-Reducer 2 [CONTAINS] | Reduce Output Operator [RS_66] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -3299,226 +3342,225 @@ | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_26] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_161] + | Merge Join Operator [MERGEJOIN_164] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3"] - | |<-Map 10 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_24] - | | key expressions:_col3 (type: string) - | | Map-reduce partition columns:_col3 (type: string) + | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col3"] + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_22] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string) - | | Map Join Operator [MAPJOIN_158] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 11":"_col0 (type: string)","Map 10":"_col0 (type: string)"} - | | | outputColumnNames:["_col1","_col2","_col3"] - | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 11 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_18] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_14] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_146] - | | | predicate:(key is not null and value is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_13] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_12] - | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_148] + | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_145] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_11] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | | TableScan [TS_0] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_143] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_0] - | | alias:x - | |<-Map 9 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_3] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | Map Join Operator [MAPJOIN_163] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 11":"_col1 (type: string)","Reducer 9":"_col0 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 11 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_14] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_151] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_13] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_12] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_11] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 8 [SIMPLE_EDGE] + | |<-Map 10 [CONTAINS] + | | Reduce Output Operator [RS_10] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_9] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_5] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_150] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_4] + | | alias:y + | |<-Map 7 [CONTAINS] + | Reduce Output Operator [RS_10] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_9] + | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_144] - | predicate:value is not null (type: boolean) - | TableScan [TS_2] - | alias:y + | Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_149] + | predicate:value is not null (type: boolean) + | TableScan [TS_2] + | alias:x |<-Reducer 29 [CONTAINS] - Reduce Output Operator [RS_118] + Reduce Output Operator [RS_117] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) sort order:++ - Group By Operator [GBY_117] + Group By Operator [GBY_116] keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Select Operator [SEL_113] + Select Operator [SEL_112] outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_163] + Map Join Operator [MAPJOIN_168] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 34":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"} + | keys:{"Map 34":"_col0 (type: string)","Reducer 29":"_col1 (type: string)"} | outputColumnNames:["_col1","_col4"] |<-Map 34 [BROADCAST_EDGE] - | Reduce Output Operator [RS_111] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) + | Reduce Output Operator [RS_110] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string), _col3 (type: string) - | Map Join Operator [MAPJOIN_160] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 34":"_col0 (type: string)","Map 33":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col1","_col3"] - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | |<-Map 33 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_103] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_99] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_156] - | | predicate:(key is not null and value is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_98] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_101] - | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_101] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_162] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_157] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_100] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_97] - outputColumnNames:["_col0"] - Group By Operator [GBY_96] - | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | outputColumnNames:["_col0","_col1"] - |<-Union 28 [SIMPLE_EDGE] - |<-Map 32 [CONTAINS] - | Reduce Output Operator [RS_95] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_94] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_90] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_155] - | predicate:value is not null (type: boolean) - | TableScan [TS_89] - | alias:y - |<-Reducer 27 [CONTAINS] - Reduce Output Operator [RS_95] - key expressions:_col0 (type: string), _col1 (type: string) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - sort order:++ - Group By Operator [GBY_94] - keys:_col0 (type: string), _col1 (type: string) - outputColumnNames:["_col0","_col1"] - Group By Operator [GBY_87] - | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | outputColumnNames:["_col0","_col1"] - |<-Union 26 [SIMPLE_EDGE] - |<-Reducer 25 [CONTAINS] - | Reduce Output Operator [RS_86] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_85] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Group By Operator [GBY_78] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | |<-Union 24 [SIMPLE_EDGE] - | |<-Map 30 [CONTAINS] - | | Reduce Output Operator [RS_77] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_76] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_72] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_153] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_71] - | | alias:y - | |<-Map 23 [CONTAINS] - | Reduce Output Operator [RS_77] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_76] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_70] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_152] - | predicate:value is not null (type: boolean) - | TableScan [TS_69] - | alias:x - |<-Map 31 [CONTAINS] - Reduce Output Operator [RS_86] - key expressions:_col0 (type: string), _col1 (type: string) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - sort order:++ - Group By Operator [GBY_85] - keys:_col0 (type: string), _col1 (type: string) - outputColumnNames:["_col0","_col1"] - Select Operator [SEL_81] + | TableScan [TS_100] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_167] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 33":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + |<-Map 33 [BROADCAST_EDGE] + | Reduce Output Operator [RS_105] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_99] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_161] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_98] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_97] + outputColumnNames:["_col0"] + Group By Operator [GBY_96] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 28 [SIMPLE_EDGE] + |<-Map 32 [CONTAINS] + | Reduce Output Operator [RS_95] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_94] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_90] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_160] + | predicate:value is not null (type: boolean) + | TableScan [TS_89] + | alias:y + |<-Reducer 27 [CONTAINS] + Reduce Output Operator [RS_95] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_94] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Group By Operator [GBY_87] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 26 [SIMPLE_EDGE] + |<-Reducer 25 [CONTAINS] + | Reduce Output Operator [RS_86] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_85] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_78] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 24 [SIMPLE_EDGE] + | |<-Map 30 [CONTAINS] + | | Reduce Output Operator [RS_77] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_76] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_72] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_158] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_71] + | | alias:y + | |<-Map 23 [CONTAINS] + | Reduce Output Operator [RS_77] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_76] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_70] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_157] + | predicate:value is not null (type: boolean) + | TableScan [TS_69] + | alias:x + |<-Map 31 [CONTAINS] + Reduce Output Operator [RS_86] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_85] + keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Filter Operator [FIL_154] - predicate:value is not null (type: boolean) - TableScan [TS_80] - alias:y + Select Operator [SEL_81] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_159] + predicate:value is not null (type: boolean) + TableScan [TS_80] + alias:y PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default Index: ql/src/test/results/clientpositive/union_remove_6_subq.q.out =================================================================== --- ql/src/test/results/clientpositive/union_remove_6_subq.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/union_remove_6_subq.q.out (working copy) @@ -559,14 +559,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: avg_window_0 arguments: _col1 name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: double) + expressions: _col0 (type: string), avg_window_0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/windowing_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/windowing_streaming.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/windowing_streaming.q.out (working copy) @@ -89,7 +89,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -97,7 +97,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _wcol0 (type: int) + expressions: _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -160,7 +160,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -168,10 +168,10 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 < 4) (type: boolean) + predicate: (rank_window_0 < 4) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _wcol0 (type: int) + expressions: _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -329,7 +329,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -337,10 +337,10 @@ isPivotResult: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 < 5) (type: boolean) + predicate: (rank_window_0 < 5) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: tinyint), _col5 (type: double), _wcol0 (type: int) + expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/groupby_resolution.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_resolution.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/groupby_resolution.q.out (working copy) @@ -690,7 +690,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -698,7 +698,7 @@ isPivotResult: true Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -109,25 +109,71 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -141,7 +187,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -150,31 +196,31 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -183,11 +229,11 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -356,7 +402,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -405,8 +451,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 Index: ql/src/test/results/clientpositive/join_alt_syntax.q.out =================================================================== --- ql/src/test/results/clientpositive/join_alt_syntax.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/join_alt_syntax.q.out (working copy) @@ -359,13 +359,13 @@ and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -379,34 +379,36 @@ outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -418,9 +420,63 @@ Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -432,21 +488,14 @@ sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -455,53 +504,6 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -519,54 +521,56 @@ and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -578,9 +582,63 @@ Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -592,21 +650,14 @@ sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -615,53 +666,6 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Index: ql/src/test/results/clientpositive/correlationoptimizer12.q.out =================================================================== --- ql/src/test/results/clientpositive/correlationoptimizer12.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/correlationoptimizer12.q.out (working copy) @@ -52,7 +52,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col1 name: count window function: GenericUDAFCountEvaluator @@ -62,7 +62,7 @@ predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: bigint) + expressions: _col0 (type: string), count_window_0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -137,7 +137,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col1 name: count window function: GenericUDAFCountEvaluator @@ -147,7 +147,7 @@ predicate: _col0 is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: bigint) + expressions: _col0 (type: string), count_window_0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/cbo_simple_select.q.out =================================================================== --- ql/src/test/results/clientpositive/cbo_simple_select.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/cbo_simple_select.q.out (working copy) @@ -120,6 +120,16 @@ 1 1 25.0 NULL NULL NULL NULL NULL NULL +PREHOOK: query: select * from cbo_t1 where (((key=1) and (c_float=10)) and (c_int=20)) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +PREHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: select * from cbo_t1 where (((key=1) and (c_float=10)) and (c_int=20)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### PREHOOK: query: -- 2. Test Select + TS + FIL select * from cbo_t1 where cbo_t1.c_int >= 0 PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/quotedid_basic.q.out =================================================================== --- ql/src/test/results/clientpositive/quotedid_basic.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/quotedid_basic.q.out (working copy) @@ -211,7 +211,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -219,7 +219,7 @@ isPivotResult: true Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -315,7 +315,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -323,7 +323,7 @@ isPivotResult: true Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/vectorized_ptf.q.out =================================================================== --- ql/src/test/results/clientpositive/vectorized_ptf.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/vectorized_ptf.q.out (working copy) @@ -387,28 +387,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -844,7 +844,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -852,7 +852,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1408,28 +1408,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1782,21 +1782,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1804,7 +1804,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2238,21 +2238,21 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator @@ -2260,7 +2260,7 @@ isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3303,7 +3303,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -3311,7 +3311,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3667,28 +3667,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4032,28 +4032,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4514,28 +4514,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4909,20 +4909,20 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col5 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol1 + alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -5503,33 +5503,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -5537,7 +5537,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6172,14 +6172,14 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col2 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~ Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6706,28 +6706,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -6841,15 +6841,15 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: sum_window_0 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint) - outputColumnNames: _col1, _col2, _col5, _wcol0 + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col1, _col2, _col5, sum_window_0 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6860,7 +6860,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col5,_wcol0 + columns _col1,_col2,_col5,sum_window_0 columns.types string,string,int,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6880,7 +6880,7 @@ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6891,7 +6891,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col5,_wcol0 + columns _col1,_col2,_col5,sum_window_0 columns.types string,string,int,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6900,7 +6900,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col5,_wcol0 + columns _col1,_col2,_col5,sum_window_0 columns.types string,string,int,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6927,35 +6927,35 @@ raw input shape: window functions: window function definition - alias: _wcol1 + alias: rank_window_1 arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol3 + alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol4 + alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -7553,28 +7553,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8138,28 +8138,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -8637,28 +8637,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -9234,28 +9234,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -9793,28 +9793,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -10311,28 +10311,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/subquery_notin.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_notin.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/subquery_notin.q.out (working copy) @@ -346,7 +346,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -354,7 +354,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) + predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -501,7 +501,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -509,7 +509,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -628,7 +628,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -636,7 +636,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -775,7 +775,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -783,7 +783,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int) @@ -950,7 +950,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -958,7 +958,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) @@ -1141,7 +1141,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1149,7 +1149,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) Index: ql/src/test/results/clientpositive/cbo_join.q.out =================================================================== --- ql/src/test/results/clientpositive/cbo_join.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/cbo_join.q.out (working copy) @@ -1,4 +1,5 @@ -PREHOOK: query: -- 4. Test Select + Join + TS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6,7 +7,8 @@ PREHOOK: Input: default@cbo_t2 PREHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: -- 4. Test Select + Join + TS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 @@ -122,126 +124,46 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 1 1 1 @@ -282,46 +204,6 @@ 1 1 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 1 1 1 @@ -522,6 +404,126 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -632,8 +634,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -730,6 +730,8 @@ 1 1 1 1 1 1 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -744,8 +746,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -847,6 +847,8 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -861,10 +863,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -966,6 +964,10 @@ NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL +NULL NULL +NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5334,8 +5336,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -5870,6 +5870,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6430,8 +6432,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -6966,6 +6966,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: -- 5. Test Select + Join + FIL + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0) PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/ptf_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/ptf_streaming.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/ptf_streaming.q.out (working copy) @@ -97,28 +97,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -313,7 +313,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lag_window_0 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -321,7 +321,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -637,7 +637,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator @@ -645,7 +645,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -814,28 +814,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1061,28 +1061,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1310,28 +1310,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1559,28 +1559,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1785,33 +1785,33 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~ window function definition - alias: _wcol3 + alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~ window function definition - alias: _wcol4 + alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator @@ -1819,7 +1819,7 @@ isPivotResult: true Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2074,28 +2074,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2364,28 +2364,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2628,28 +2628,28 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol1 + alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition - alias: _wcol2 + alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/join32_lessSize.q.out =================================================================== --- ql/src/test/results/clientpositive/join32_lessSize.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/join32_lessSize.q.out (working copy) @@ -130,7 +130,7 @@ GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -139,31 +139,31 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -175,8 +175,8 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 - columns.types string,string + columns _col1,_col2,_col3 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -190,7 +190,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -200,14 +200,14 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -220,26 +220,23 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -247,59 +244,106 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -318,11 +362,11 @@ keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -362,8 +406,8 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 - columns.types string,string + columns _col1,_col2,_col3 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -371,16 +415,19 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col3 - columns.types string,string + columns _col1,_col2,_col3 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -388,11 +435,13 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -402,24 +451,21 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: #### A masked pattern was here #### @@ -470,8 +516,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -657,27 +703,28 @@ Stage-8 depends on stages: Stage-11 Stage-10 depends on stages: Stage-8 Stage-7 depends on stages: Stage-10 - Stage-9 depends on stages: Stage-7 - Stage-6 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 + $hdt$_1:$hdt$_3:x + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -686,12 +733,9 @@ HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 1 - - Stage: Stage-8 - Map Reduce - Map Operator Tree: + 1 _col1 (type: string) + Position of Big Table: 0 + $hdt$_1:$hdt$_3:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -704,32 +748,62 @@ expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + 1 _col1 (type: string) + outputColumnNames: _col1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col4 + Position of Big Table: 0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col3 - columns.types string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col4 + columns.types string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -737,7 +811,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -747,14 +821,14 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -767,106 +841,21 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 - Truncated Path -> Alias: - /src1 [$hdt$_1:$hdt$_2:$hdt$_3:x] - - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:w - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:w - TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col4 - Position of Big Table: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + name: default.src + name: default.src #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col4 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col3 - columns.types string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col3 - columns.types string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -876,14 +865,14 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -896,22 +885,22 @@ columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: -#### A masked pattern was here #### + /src [$hdt$_1:$hdt$_1:w] - Stage: Stage-9 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:w @@ -937,7 +926,7 @@ 1 _col1 (type: string) Position of Big Table: 1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -950,17 +939,17 @@ 1 _col1 (type: string) outputColumnNames: _col1, _col3, _col6 Position of Big Table: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -993,7 +982,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10001 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: Index: ql/src/test/results/clientpositive/groupby_grouping_window.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out (working copy) @@ -106,7 +106,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col3 name: rank window function: GenericUDAFRankEvaluator @@ -114,7 +114,7 @@ isPivotResult: true Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _wcol0 (type: int) + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (working copy) @@ -231,7 +231,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -239,7 +239,7 @@ isPivotResult: true Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) @@ -399,7 +399,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -407,7 +407,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) @@ -842,7 +842,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -850,7 +850,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) + predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -997,7 +997,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col5 name: rank window function: GenericUDAFRankEvaluator @@ -1005,7 +1005,7 @@ isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_wcol0 <= 2) (type: boolean) + predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) Index: ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out (working copy) @@ -808,32 +808,32 @@ alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -843,10 +843,10 @@ 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col1 + outputColumnNames: _col2 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int) + expressions: _col2 (type: int) outputColumnNames: _col0 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator Index: ql/src/test/results/clientpositive/ctas_colname.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas_colname.q.out (revision 1674187) +++ ql/src/test/results/clientpositive/ctas_colname.q.out (working copy) @@ -198,7 +198,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -206,7 +206,7 @@ isPivotResult: true Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -360,7 +360,7 @@ raw input shape: window functions: window function definition - alias: _wcol0 + alias: lead_window_0 arguments: _col0, 1 name: lead window function: GenericUDAFLeadEvaluator @@ -368,7 +368,7 @@ isPivotResult: true Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: string) + expressions: _col0 (type: string), _col1 (type: string), lead_window_0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Index: ql/src/test/queries/clientpositive/cbo_join.q =================================================================== --- ql/src/test/queries/clientpositive/cbo_join.q (revision 1674187) +++ ql/src/test/queries/clientpositive/cbo_join.q (working copy) @@ -4,6 +4,7 @@ set hive.stats.fetch.column.stats=true; set hive.auto.convert.join=false; +-- SORT_QUERY_RESULTS -- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key; select cbo_t1.key from cbo_t1 join cbo_t3; Index: ql/src/test/queries/clientpositive/cbo_simple_select.q =================================================================== --- ql/src/test/queries/clientpositive/cbo_simple_select.q (revision 1674187) +++ ql/src/test/queries/clientpositive/cbo_simple_select.q (working copy) @@ -10,6 +10,7 @@ select * from cbo_t1 as cbo_t2; select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1; +select * from cbo_t1 where (((key=1) and (c_float=10)) and (c_int=20)); -- 2. Test Select + TS + FIL select * from cbo_t1 where cbo_t1.c_int >= 0; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java (working copy) @@ -20,21 +20,62 @@ import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys; import com.google.common.collect.ImmutableList; public class HiveDefaultRelMetadataProvider { - private HiveDefaultRelMetadataProvider() { + + private final HiveConf hiveConf; + + + public HiveDefaultRelMetadataProvider(HiveConf hiveConf) { + this.hiveConf = hiveConf; } - public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList - .of(HiveRelMdDistinctRowCount.SOURCE, + public RelMetadataProvider getMetadataProvider() { + + // Create cost metadata provider + final HiveCostModel cm; + if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") + && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) { + cm = HiveOnTezCostModel.getCostModel(hiveConf); + } else { + cm = HiveDefaultCostModel.getCostModel(); + } + + // Get max split size for HiveRelMdParallelism + final Double maxSplitSize = (double) HiveConf.getLongVar( + this.hiveConf, + HiveConf.ConfVars.MAPREDMAXSPLITSIZE); + + // Return MD provider + return ChainedRelMetadataProvider.of(ImmutableList + .of( + HiveRelMdDistinctRowCount.SOURCE, + new HiveRelMdCost(cm).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, + HiveRelMdSize.SOURCE, + HiveRelMdMemory.SOURCE, + new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), + HiveRelMdDistribution.SOURCE, + HiveRelMdCollation.SOURCE, new DefaultRelMetadataProvider())); } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java (working copy) @@ -24,9 +24,9 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; public class HiveFilter extends Filter implements HiveRelNode { @@ -48,7 +48,7 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } /** Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java (working copy) @@ -31,7 +31,6 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; import com.google.common.collect.ImmutableList; @@ -39,6 +38,8 @@ public static final HiveAggRelFactory HIVE_AGGR_REL_FACTORY = new HiveAggRelFactory(); + + public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, boolean indicator, ImmutableBitSet groupSet, List groupSets, List aggCalls) throws InvalidRelException { @@ -66,7 +67,7 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } @Override @@ -75,6 +76,11 @@ .makeLiteral(true)); } + public boolean isBucketedInput() { + return RelMetadataQuery.distribution(this.getInput()).getKeys(). + containsAll(groupSet.asList()); + } + private static class HiveAggRelFactory implements AggregateFactory { @Override Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (working copy) @@ -29,6 +29,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -42,7 +43,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; - import com.google.common.collect.ImmutableList; public class HiveProject extends Project implements HiveRelNode { @@ -172,7 +172,7 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistributionTraitDef; +import org.apache.calcite.rel.RelInput; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.SortExchange; + +public class HiveSortExchange extends SortExchange { + + private HiveSortExchange(RelOptCluster cluster, RelTraitSet traitSet, + RelNode input, RelDistribution distribution, RelCollation collation) { + super(cluster, traitSet, input, distribution, collation); + } + + public HiveSortExchange(RelInput input) { + super(input); + } + + /** + * Creates a HiveSortExchange. + * + * @param input Input relational expression + * @param distribution Distribution specification + * @param collation Collation specification + */ + public static HiveSortExchange create(RelNode input, + RelDistribution distribution, RelCollation collation) { + RelOptCluster cluster = input.getCluster(); + distribution = RelDistributionTraitDef.INSTANCE.canonize(distribution); + collation = RelCollationTraitDef.INSTANCE.canonize(collation); + RelTraitSet traitSet = RelTraitSet.createEmpty().plus(distribution).plus(collation); + return new HiveSortExchange(cluster, traitSet, input, distribution, collation); + } + + @Override + public SortExchange copy(RelTraitSet traitSet, RelNode newInput, RelDistribution newDistribution, + RelCollation newCollation) { + return new HiveSortExchange(getCluster(), traitSet, newInput, + newDistribution, newCollation); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (working copy) @@ -17,21 +17,34 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; +import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; + /** * Relational expression representing a scan of a HiveDB collection. * @@ -42,6 +55,14 @@ */ public class HiveTableScan extends TableScan implements HiveRelNode { + private final RelDataType hiveTableScanRowType; + private final ImmutableList neededColIndxsFrmReloptHT; + private final String tblAlias; + + public String getTableAlias() { + return tblAlias; + } + /** * Creates a HiveTableScan. * @@ -54,10 +75,17 @@ * @param table * HiveDB table */ - public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, - RelDataType rowtype) { + public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, String alias) { + this(cluster, traitSet, table, alias, table.getRowType()); + } + + private HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, String alias, + RelDataType newRowtype) { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table); assert getConvention() == HiveRelNode.CONVENTION; + this.tblAlias = alias; + this.hiveTableScanRowType = newRowtype; + this.neededColIndxsFrmReloptHT = buildNeededColIndxsFrmReloptHT(table.getRowType(), newRowtype); } @Override @@ -66,9 +94,21 @@ return this; } + /** + * Copy TableScan operator with a new Row Schema. The new Row Schema can only + * be a subset of this TS schema. + * + * @param newRowtype + * @return + */ + public HiveTableScan copy(RelDataType newRowtype) { + return new HiveTableScan(getCluster(), getTraitSet(), ((RelOptHiveTable) table), this.tblAlias, + newRowtype); + } + @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } @Override @@ -89,4 +129,62 @@ public List getColStat(List projIndxLst) { return ((RelOptHiveTable) table).getColStat(projIndxLst); } -} \ No newline at end of file + + @Override + public RelNode project(ImmutableBitSet fieldsUsed, Set extraFields, + RelFactories.ProjectFactory projectFactory) { + + // 1. If the schema is the same then bail out + final int fieldCount = getRowType().getFieldCount(); + if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) { + return this; + } + + // 2. Make sure there is no dynamic addition of virtual cols + if (extraFields != null && !extraFields.isEmpty()) { + throw new RuntimeException("Hive TS does not support adding virtual columns dynamically"); + } + + // 3. Create new TS schema that is a subset of original + final List fields = getRowType().getFieldList(); + List fieldTypes = new LinkedList(); + List fieldNames = new LinkedList(); + List exprList = new ArrayList(); + RexBuilder rexBuilder = getCluster().getRexBuilder(); + for (int i : fieldsUsed) { + RelDataTypeField field = fields.get(i); + fieldTypes.add(field.getType()); + fieldNames.add(field.getName()); + exprList.add(rexBuilder.makeInputRef(this, i)); + } + + // 4. Build new TS + HiveTableScan newHT = copy(getCluster().getTypeFactory().createStructType(fieldTypes, + fieldNames)); + + // 5. Add Proj on top of TS + return projectFactory.createProject(newHT, exprList, new ArrayList(fieldNames)); + } + + public List getNeededColIndxsFrmReloptHT() { + return neededColIndxsFrmReloptHT; + } + + public RelDataType getPrunedRowType() { + return hiveTableScanRowType; + } + + private static ImmutableList buildNeededColIndxsFrmReloptHT(RelDataType htRowtype, + RelDataType scanRowType) { + Builder neededColIndxsFrmReloptHTBldr = new ImmutableList.Builder(); + Map colNameToPosInReloptHT = HiveCalciteUtil.getRowColNameIndxMap(htRowtype + .getFieldList()); + List colNamesInScanRowType = scanRowType.getFieldNames(); + + for (int i = 0; i < colNamesInScanRowType.size(); i++) { + neededColIndxsFrmReloptHTBldr.add(colNameToPosInReloptHT.get(colNamesInScanRowType.get(i))); + } + + return neededColIndxsFrmReloptHTBldr.build(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java (working copy) @@ -25,9 +25,9 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; public class HiveLimit extends SingleRel implements HiveRelNode { private final RexNode offset; @@ -52,6 +52,6 @@ @Override public RelOptCost computeSelfCost(RelOptPlanner planner) { - return HiveCost.FACTORY.makeZeroCost(); + return RelMetadataQuery.getNonCumulativeCost(this); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java (working copy) @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.Set; import org.apache.calcite.plan.RelOptCluster; @@ -25,7 +27,11 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.RelFactories.JoinFactory; @@ -33,38 +39,38 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel.JoinAlgorithm; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel.DefaultJoinAlgorithm; +import com.google.common.collect.ImmutableList; + //TODO: Should we convert MultiJoin to be a child of HiveJoin public class HiveJoin extends Join implements HiveRelNode { - // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN - // each parallel computation handles multiple splits where as in case of SMB - // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is - // hash joins where MAP_JOIN keeps the whole data set of non streaming tables - // in memory where as BUCKET_JOIN keeps only the b - public enum JoinAlgorithm { - NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN - } + public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); + public enum MapJoinStreamingRelation { NONE, LEFT_RELATION, RIGHT_RELATION } - public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); - private final boolean leftSemiJoin; - private final JoinAlgorithm joinAlgorithm; - //This will be used once we do Join Algorithm selection - @SuppressWarnings("unused") - private final MapJoinStreamingRelation mapJoinStreamingSide = MapJoinStreamingRelation.NONE; + private final JoinPredicateInfo joinPredInfo; + private JoinAlgorithm joinAlgorithm; + private RelOptCost joinCost; + public static HiveJoin getJoin(RelOptCluster cluster, RelNode left, RelNode right, RexNode condition, JoinRelType joinType, boolean leftSemiJoin) { try { Set variablesStopped = Collections.emptySet(); - return new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped, - JoinAlgorithm.NONE, null, leftSemiJoin); + HiveJoin join = new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped, + DefaultJoinAlgorithm.INSTANCE, leftSemiJoin); + return join; } catch (InvalidRelException e) { throw new RuntimeException(e); } @@ -72,10 +78,10 @@ protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, RexNode condition, JoinRelType joinType, Set variablesStopped, - JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin) - throws InvalidRelException { + JoinAlgorithm joinAlgo, boolean leftSemiJoin) throws InvalidRelException { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType, variablesStopped); + this.joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(this); this.joinAlgorithm = joinAlgo; this.leftSemiJoin = leftSemiJoin; } @@ -90,7 +96,7 @@ try { Set variablesStopped = Collections.emptySet(); return new HiveJoin(getCluster(), traitSet, left, right, conditionExpr, joinType, - variablesStopped, JoinAlgorithm.NONE, null, leftSemiJoin); + variablesStopped, joinAlgorithm, leftSemiJoin); } catch (InvalidRelException e) { // Semantic error not possible. Must be a bug. Convert to // internal error. @@ -98,10 +104,95 @@ } } - public JoinAlgorithm getJoinAlgorithm() { - return joinAlgorithm; + public JoinPredicateInfo getJoinPredicateInfo() { + return joinPredInfo; } + public void setJoinAlgorithm(JoinAlgorithm joinAlgorithm) { + this.joinAlgorithm = joinAlgorithm; + } + + public ImmutableList getCollation() { + return joinAlgorithm.getCollation(this); + } + + public RelDistribution getDistribution() { + return joinAlgorithm.getDistribution(this); + } + + public Double getMemory() { + return joinAlgorithm.getMemory(this); + } + + public Double getCumulativeMemoryWithinPhaseSplit() { + return joinAlgorithm.getCumulativeMemoryWithinPhaseSplit(this); + } + + public Boolean isPhaseTransition() { + return joinAlgorithm.isPhaseTransition(this); + } + + public Integer getSplitCount() { + return joinAlgorithm.getSplitCount(this); + } + + public MapJoinStreamingRelation getStreamingSide() { + Double leftInputSize = RelMetadataQuery.memory(left); + Double rightInputSize = RelMetadataQuery.memory(right); + if (leftInputSize == null && rightInputSize == null) { + return MapJoinStreamingRelation.NONE; + } else if (leftInputSize != null && + (rightInputSize == null || + (leftInputSize < rightInputSize))) { + return MapJoinStreamingRelation.RIGHT_RELATION; + } else if (rightInputSize != null && + (leftInputSize == null || + (rightInputSize <= leftInputSize))) { + return MapJoinStreamingRelation.LEFT_RELATION; + } + return MapJoinStreamingRelation.NONE; + } + + public RelNode getStreamingInput() { + MapJoinStreamingRelation mapJoinStreamingSide = getStreamingSide(); + RelNode smallInput; + if (mapJoinStreamingSide == MapJoinStreamingRelation.LEFT_RELATION) { + smallInput = this.getRight(); + } else if (mapJoinStreamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { + smallInput = this.getLeft(); + } else { + smallInput = null; + } + return smallInput; + } + + public ImmutableBitSet getSortedInputs() { + ImmutableBitSet.Builder sortedInputsBuilder = new ImmutableBitSet.Builder(); + JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo. + constructJoinPredicateInfo(this); + List joinKeysInChildren = new ArrayList(); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema())); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema())); + + for (int i=0; i fieldCollations) { + super(fieldCollations); + } + +} + + Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java (revision 0) @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; + + +public class HiveConfigContext implements Context { + private HiveAlgorithmsConf config; + + public HiveConfigContext(HiveAlgorithmsConf config) { + this.config = config; + } + + public T unwrap(Class clazz) { + if (clazz.isInstance(config)) { + return clazz.cast(config); + } + return null; + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java (working copy) @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.calcite.cost; - -import org.apache.calcite.plan.RelOptCost; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; - -// Use this once we have Join Algorithm selection -public class HiveCostUtil { - private static final double cpuCostInNanoSec = 1.0; - private static final double netCostInNanoSec = 150 * cpuCostInNanoSec; - private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec; - private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec; - private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec; - @SuppressWarnings("unused") -//Use this once we have Join Algorithm selection - private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec; - - public static RelOptCost computCardinalityBasedCost(HiveRelNode hr) { - return new HiveCost(hr.getRows(), 0, 0); - } - - public static HiveCost computeCost(HiveTableScan t) { - double cardinality = t.getRows(); - return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0); - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java (revision 0) @@ -0,0 +1,364 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +import com.google.common.collect.ImmutableList; + +public class HiveAlgorithmsUtil { + + private final double cpuCost; + private final double netCost; + private final double localFSWrite; + private final double localFSRead; + private final double hdfsWrite; + private final double hdfsRead; + + HiveAlgorithmsUtil(HiveConf conf) { + cpuCost = Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_CPU)); + netCost = cpuCost + * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_NET)); + localFSWrite = netCost + * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_LFS_WRITE)); + localFSRead = netCost + * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_LFS_READ)); + hdfsWrite = localFSWrite + * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_HDFS_WRITE)); + hdfsRead = localFSRead + * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_HDFS_READ)); + } + + public static RelOptCost computeCardinalityBasedCost(HiveRelNode hr) { + return new HiveCost(hr.getRows(), 0, 0); + } + + public HiveCost computeCost(HiveTableScan t) { + double cardinality = t.getRows(); + return new HiveCost(cardinality, 0, hdfsWrite * cardinality * 0); + } + + public double computeSortMergeCPUCost( + ImmutableList cardinalities, + ImmutableBitSet sorted) { + // Sort-merge join + double cpuCost = 0.0; + for (int i=0; i> relationInfos) { + // Sort-merge join + double ioCost = 0.0; + for (Pair relationInfo : relationInfos) { + ioCost += computeSortIOCost(relationInfo); + } + return ioCost; + } + + public double computeSortIOCost(Pair relationInfo) { + // Sort-merge join + double ioCost = 0.0; + double cardinality = relationInfo.left; + double averageTupleSize = relationInfo.right; + // Write cost + ioCost += cardinality * averageTupleSize * localFSWrite; + // Read cost + ioCost += cardinality * averageTupleSize * localFSRead; + // Net transfer cost + ioCost += cardinality * averageTupleSize * netCost; + return ioCost; + } + + public static double computeMapJoinCPUCost( + ImmutableList cardinalities, + ImmutableBitSet streaming) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i cardinalities, + ImmutableBitSet streaming) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i cardinalities) { + // Hash-join + double cpuCost = 0.0; + for (int i=0; i> relationInfos, + ImmutableBitSet streaming, int parallelism) { + // Hash-join + double ioCost = 0.0; + for (int i=0; i maxSize) { + return false; + } + return true; + } + return false; + } + + public static ImmutableList getJoinCollation(JoinPredicateInfo joinPredInfo, + MapJoinStreamingRelation streamingRelation) { + // Compute collations + ImmutableList.Builder collationListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder leftCollationListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder rightCollationListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { + final RelFieldCollation leftFieldCollation = new RelFieldCollation(leftPos); + collationListBuilder.add(leftFieldCollation); + leftCollationListBuilder.add(leftFieldCollation); + } + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { + final RelFieldCollation rightFieldCollation = new RelFieldCollation(rightPos); + collationListBuilder.add(rightFieldCollation); + rightCollationListBuilder.add(rightFieldCollation); + } + } + + // Return join collations + final ImmutableList collation; + switch (streamingRelation) { + case LEFT_RELATION: + collation = ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(leftCollationListBuilder.build()))); + break; + case RIGHT_RELATION: + collation = ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(rightCollationListBuilder.build()))); + break; + default: + collation = ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(collationListBuilder.build()))); + break; + } + return collation; + } + + public static RelDistribution getJoinRedistribution(JoinPredicateInfo joinPredInfo) { + // Compute distribution + ImmutableList.Builder keysListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { + keysListBuilder.add(leftPos); + } + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { + keysListBuilder.add(rightPos); + } + } + return new HiveRelDistribution( + RelDistribution.Type.HASH_DISTRIBUTED, keysListBuilder.build()); + } + + public static RelDistribution getJoinDistribution(JoinPredicateInfo joinPredInfo, + MapJoinStreamingRelation streamingRelation) { + // Compute distribution + ImmutableList.Builder leftKeysListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder rightKeysListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) { + leftKeysListBuilder.add(leftPos); + } + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) { + rightKeysListBuilder.add(rightPos); + } + } + + RelDistribution distribution = null; + // Keep buckets from the streaming relation + if (streamingRelation == MapJoinStreamingRelation.LEFT_RELATION) { + distribution = new HiveRelDistribution( + RelDistribution.Type.HASH_DISTRIBUTED, leftKeysListBuilder.build()); + } else if (streamingRelation == MapJoinStreamingRelation.RIGHT_RELATION) { + distribution = new HiveRelDistribution( + RelDistribution.Type.HASH_DISTRIBUTED, rightKeysListBuilder.build()); + } + + return distribution; + } + + public static Double getJoinMemory(HiveJoin join) { + return getJoinMemory(join, join.getStreamingSide()); + } + + public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) { + Double memory = 0.0; + if (streamingSide == MapJoinStreamingRelation.NONE || + streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { + // Left side + final Double leftAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double leftRowCount = RelMetadataQuery.getRowCount(join.getLeft()); + if (leftAvgRowSize == null || leftRowCount == null) { + return null; + } + memory += leftAvgRowSize * leftRowCount; + } + if (streamingSide == MapJoinStreamingRelation.NONE || + streamingSide == MapJoinStreamingRelation.LEFT_RELATION) { + // Right side + final Double rightAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + final Double rightRowCount = RelMetadataQuery.getRowCount(join.getRight()); + if (rightAvgRowSize == null || rightRowCount == null) { + return null; + } + memory += rightAvgRowSize * rightRowCount; + } + return memory; + } + + public static Integer getSplitCountWithRepartition(HiveJoin join) { + final Double maxSplitSize = join.getCluster().getPlanner().getContext(). + unwrap(HiveAlgorithmsConf.class).getMaxSplitSize(); + // We repartition: new number of splits + final Double averageRowSize = RelMetadataQuery.getAverageRowSize(join); + final Double rowCount = RelMetadataQuery.getRowCount(join); + if (averageRowSize == null || rowCount == null) { + return null; + } + final Double totalSize = averageRowSize * rowCount; + final Double splitCount = totalSize / maxSplitSize; + return splitCount.intValue(); + } + + public static Integer getSplitCountWithoutRepartition(HiveJoin join) { + RelNode largeInput; + if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { + largeInput = join.getLeft(); + } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { + largeInput = join.getRight(); + } else { + return null; + } + return RelMetadataQuery.splitCount(largeInput); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java (working copy) @@ -90,22 +90,17 @@ return io; } - // TODO: If two cost is equal, could we do any better than comparing - // cardinality (may be some other heuristics to break the tie) public boolean isLe(RelOptCost other) { - return this == other || this.rowCount <= other.getRows(); - /* - * if (((this.dCpu + this.dIo) < (other.getCpu() + other.getIo())) || - * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo()) && this.dRows - * <= other.getRows())) { return true; } else { return false; } - */ + if ( (this.cpu + this.io < other.getCpu() + other.getIo()) || + ((this.cpu + this.io == other.getCpu() + other.getIo()) && + (this.rowCount <= other.getRows()))) { + return true; } + return false; + } public boolean isLt(RelOptCost other) { - return this.rowCount < other.getRows(); - /* - * return isLe(other) && !equals(other); - */ + return isLe(other) && !equals(other); } public double getRows() { @@ -113,21 +108,14 @@ } public boolean equals(RelOptCost other) { - return (this == other) || ((this.rowCount) == (other.getRows())); - - /* - * //TODO: should we consider cardinality as well? return (this == other) || - * ((this.dCpu + this.dIo) == (other.getCpu() + other.getIo())); - */ + return (this == other) || + ((this.cpu + this.io == other.getCpu() + other.getIo()) && + (this.rowCount == other.getRows())); } public boolean isEqWithEpsilon(RelOptCost other) { - return (this == other) || (Math.abs((this.rowCount) - (other.getRows())) < RelOptUtil.EPSILON); - // Turn this one once we do the Algorithm selection in CBO - /* - * return (this == other) || (Math.abs((this.dCpu + this.dIo) - - * (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); - */ + return (this == other) || (Math.abs((this.cpu + this.io) - + (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); } public RelOptCost minus(RelOptCost other) { @@ -135,8 +123,8 @@ return this; } - return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), this.io - - other.getIo()); + return new HiveCost(this.rowCount - other.getRows(), this.cpu - other.getCpu(), + this.io - other.getIo()); } public RelOptCost multiplyBy(double factor) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java (revision 0) @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import java.util.Set; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +/** + * Cost model interface. + */ +public abstract class HiveCostModel { + + private static final Log LOG = LogFactory.getLog(HiveCostModel.class); + + private final Set joinAlgorithms; + + + public HiveCostModel(Set joinAlgorithms) { + this.joinAlgorithms = joinAlgorithms; + } + + public abstract RelOptCost getDefaultCost(); + + public abstract RelOptCost getAggregateCost(HiveAggregate aggregate); + + public RelOptCost getJoinCost(HiveJoin join) { + // Select algorithm with min cost + JoinAlgorithm joinAlgorithm = null; + RelOptCost minJoinCost = null; + + if (LOG.isDebugEnabled()) { + LOG.debug("Join algorithm selection for:\n" + RelOptUtil.toString(join)); + } + + for (JoinAlgorithm possibleAlgorithm : this.joinAlgorithms) { + if (!possibleAlgorithm.isExecutable(join)) { + continue; + } + RelOptCost joinCost = possibleAlgorithm.getCost(join); + if (LOG.isDebugEnabled()) { + LOG.debug(possibleAlgorithm + " cost: " + joinCost); + } + if (minJoinCost == null || joinCost.isLt(minJoinCost) ) { + joinAlgorithm = possibleAlgorithm; + minJoinCost = joinCost; + } + } + + if (LOG.isDebugEnabled()) { + LOG.debug(joinAlgorithm + " selected"); + } + + join.setJoinAlgorithm(joinAlgorithm); + join.setJoinCost(minJoinCost); + + return minJoinCost; + } + + /** + * Interface for join algorithm. + */ + public interface JoinAlgorithm { + public String toString(); + public boolean isExecutable(HiveJoin join); + public RelOptCost getCost(HiveJoin join); + public ImmutableList getCollation(HiveJoin join); + public RelDistribution getDistribution(HiveJoin join); + public Double getMemory(HiveJoin join); + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join); + public Boolean isPhaseTransition(HiveJoin join); + public Integer getSplitCount(HiveJoin join); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java (revision 0) @@ -0,0 +1,598 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistribution.Type; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; + +/** + * Cost model for Tez execution engine. + */ +public class HiveOnTezCostModel extends HiveCostModel { + + private static HiveOnTezCostModel INSTANCE; + + private static HiveAlgorithmsUtil algoUtils; + + synchronized public static HiveOnTezCostModel getCostModel(HiveConf conf) { + if (INSTANCE == null) { + INSTANCE = new HiveOnTezCostModel(conf); + } + + return INSTANCE; + } + + private HiveOnTezCostModel(HiveConf conf) { + super(Sets.newHashSet( + TezCommonJoinAlgorithm.INSTANCE, + TezMapJoinAlgorithm.INSTANCE, + TezBucketJoinAlgorithm.INSTANCE, + TezSMBJoinAlgorithm.INSTANCE)); + + algoUtils = new HiveAlgorithmsUtil(conf); + } + + @Override + public RelOptCost getDefaultCost() { + return HiveCost.FACTORY.makeZeroCost(); + } + + @Override + public RelOptCost getAggregateCost(HiveAggregate aggregate) { + if (aggregate.isBucketedInput()) { + return HiveCost.FACTORY.makeZeroCost(); + } else { + // 1. Sum of input cardinalities + final Double rCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + if (rCount == null) { + return null; + } + // 2. CPU cost = sorting cost + final double cpuCost = algoUtils.computeSortCPUCost(rCount); + // 3. IO cost = cost of writing intermediary results to local FS + + // cost of reading from local FS for transferring to GBy + + // cost of transferring map outputs to GBy operator + final Double rAverageSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); + if (rAverageSize == null) { + return null; + } + final double ioCost = algoUtils.computeSortIOCost(new Pair(rCount,rAverageSize)); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + } + + /** + * COMMON_JOIN is Sort Merge Join. Each parallel computation handles multiple + * splits. + */ + public static class TezCommonJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezCommonJoinAlgorithm(); + private static final String ALGORITHM_NAME = "CommonJoin"; + + + @Override + public String toString() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + return true; + } + + @Override + public RelOptCost getCost(HiveJoin join) { + // 1. Sum of input cardinalities + final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + if (leftRCount == null || rightRCount == null) { + return null; + } + final double rCount = leftRCount + rightRCount; + // 2. CPU cost = sorting cost (for each relation) + + // total merge cost + ImmutableList cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + final double cpuCost = algoUtils.computeSortMergeCPUCost(cardinalities, join.getSortedInputs()); + // 3. IO cost = cost of writing intermediary results to local FS + + // cost of reading from local FS for transferring to join + + // cost of transferring map outputs to Join operator + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final double ioCost = algoUtils.computeSortMergeIOCost(relationInfos); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + MapJoinStreamingRelation.NONE); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo()); + } + + @Override + public Double getMemory(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinMemory(join, MapJoinStreamingRelation.NONE); + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + final Double memoryWithinPhase = + RelMetadataQuery.cumulativeMemoryWithinPhase(join); + final Integer splitCount = RelMetadataQuery.splitCount(join); + if (memoryWithinPhase == null || splitCount == null) { + return null; + } + return memoryWithinPhase / splitCount; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return true; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithRepartition(join); + } + } + + /** + * MAP_JOIN a hash join that keeps the whole data set of non streaming tables + * in memory. + */ + public static class TezMapJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezMapJoinAlgorithm(); + private static final String ALGORITHM_NAME = "MapJoin"; + + + @Override + public String toString() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + final Double maxMemory = join.getCluster().getPlanner().getContext(). + unwrap(HiveAlgorithmsConf.class).getMaxMemory(); + // Check streaming side + RelNode smallInput = join.getStreamingInput(); + if (smallInput == null) { + return false; + } + return HiveAlgorithmsUtil.isFittingIntoMemory(maxMemory, smallInput, 1); + } + + @Override + public RelOptCost getCost(HiveJoin join) { + // 1. Sum of input cardinalities + final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + if (leftRCount == null || rightRCount == null) { + return null; + } + final double rCount = leftRCount + rightRCount; + // 2. CPU cost = HashTable construction cost + + // join cost + ImmutableList cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + switch (join.getStreamingSide()) { + case LEFT_RELATION: + streamingBuilder.set(0); + break; + case RIGHT_RELATION: + streamingBuilder.set(1); + break; + default: + return null; + } + ImmutableBitSet streaming = streamingBuilder.build(); + final double cpuCost = HiveAlgorithmsUtil.computeMapJoinCPUCost(cardinalities, streaming); + // 3. IO cost = cost of transferring small tables to join node * + // degree of parallelism + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final int parallelism = RelMetadataQuery.splitCount(join) == null + ? 1 : RelMetadataQuery.splitCount(join); + final double ioCost = algoUtils.computeMapJoinIOCost(relationInfos, streaming, parallelism); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION + || join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) { + return null; + } + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + join.getStreamingSide()); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION + || join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) { + return null; + } + return HiveAlgorithmsUtil.getJoinDistribution(join.getJoinPredicateInfo(), + join.getStreamingSide()); + } + + @Override + public Double getMemory(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinMemory(join); + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + // Check streaming side + RelNode inMemoryInput; + if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { + inMemoryInput = join.getRight(); + } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { + inMemoryInput = join.getLeft(); + } else { + return null; + } + // If simple map join, the whole relation goes in memory + return RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput); + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join); + } + } + + /** + * BUCKET_JOIN is a hash joins where one bucket of the non streaming tables + * is kept in memory at the time. + */ + public static class TezBucketJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezBucketJoinAlgorithm(); + private static final String ALGORITHM_NAME = "BucketJoin"; + + + @Override + public String toString() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + final Double maxMemory = join.getCluster().getPlanner().getContext(). + unwrap(HiveAlgorithmsConf.class).getMaxMemory(); + // Check streaming side + RelNode smallInput = join.getStreamingInput(); + if (smallInput == null) { + return false; + } + // Get key columns + JoinPredicateInfo joinPredInfo = join.getJoinPredicateInfo(); + List joinKeysInChildren = new ArrayList(); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema())); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema())); + + // Requirements: for Bucket, bucketed by their keys on both sides and fitting in memory + // Obtain number of buckets + Integer buckets = RelMetadataQuery.splitCount(smallInput); + if (buckets == null) { + return false; + } + if (!HiveAlgorithmsUtil.isFittingIntoMemory(maxMemory, smallInput, buckets)) { + return false; + } + for (int i=0; i cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + switch (join.getStreamingSide()) { + case LEFT_RELATION: + streamingBuilder.set(0); + break; + case RIGHT_RELATION: + streamingBuilder.set(1); + break; + default: + return null; + } + ImmutableBitSet streaming = streamingBuilder.build(); + final double cpuCost = algoUtils.computeBucketMapJoinCPUCost(cardinalities, streaming); + // 3. IO cost = cost of transferring small tables to join node * + // degree of parallelism + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final int parallelism = RelMetadataQuery.splitCount(join) == null + ? 1 : RelMetadataQuery.splitCount(join); + final double ioCost = algoUtils.computeBucketMapJoinIOCost(relationInfos, streaming, parallelism); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION + || join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) { + return null; + } + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + join.getStreamingSide()); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo()); + } + + @Override + public Double getMemory(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinMemory(join); + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + // Check streaming side + RelNode inMemoryInput; + if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { + inMemoryInput = join.getRight(); + } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { + inMemoryInput = join.getLeft(); + } else { + return null; + } + // If bucket map join, only a split goes in memory + final Double memoryInput = + RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput); + final Integer splitCount = RelMetadataQuery.splitCount(inMemoryInput); + if (memoryInput == null || splitCount == null) { + return null; + } + return memoryInput / splitCount; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join); + } + } + + /** + * SMB_JOIN is a Sort Merge Join. Each parallel computation handles one bucket. + */ + public static class TezSMBJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new TezSMBJoinAlgorithm(); + private static final String ALGORITHM_NAME = "SMBJoin"; + + + @Override + public String toString() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + // Requirements: for SMB, sorted by their keys on both sides and bucketed. + // Get key columns + JoinPredicateInfo joinPredInfo = join.getJoinPredicateInfo(); + List joinKeysInChildren = new ArrayList(); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema())); + joinKeysInChildren.add( + ImmutableIntList.copyOf( + joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema())); + + for (int i=0; i cardinalities = new ImmutableList.Builder(). + add(leftRCount). + add(rightRCount). + build(); + ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + switch (join.getStreamingSide()) { + case LEFT_RELATION: + streamingBuilder.set(0); + break; + case RIGHT_RELATION: + streamingBuilder.set(1); + break; + default: + return null; + } + ImmutableBitSet streaming = streamingBuilder.build(); + final double cpuCost = HiveAlgorithmsUtil.computeSMBMapJoinCPUCost(cardinalities); + // 3. IO cost = cost of transferring small tables to join node * + // degree of parallelism + final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + if (leftRAverageSize == null || rightRAverageSize == null) { + return null; + } + ImmutableList> relationInfos = new ImmutableList.Builder>(). + add(new Pair(leftRCount,leftRAverageSize)). + add(new Pair(rightRCount,rightRAverageSize)). + build(); + final int parallelism = RelMetadataQuery.splitCount(join) == null + ? 1 : RelMetadataQuery.splitCount(join); + final double ioCost = algoUtils.computeSMBMapJoinIOCost(relationInfos, streaming, parallelism); + // 4. Result + return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(), + MapJoinStreamingRelation.NONE); + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo()); + } + + @Override + public Double getMemory(HiveJoin join) { + return 0.0; + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + final Double memoryWithinPhase = + RelMetadataQuery.cumulativeMemoryWithinPhase(join); + final Integer splitCount = RelMetadataQuery.splitCount(join); + if (memoryWithinPhase == null || splitCount == null) { + return null; + } + return memoryWithinPhase / splitCount; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join); + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java (revision 0) @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdPercentageOriginalRows; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +/** + * HiveRelMdCost supplies the implementation of cost model. + */ +public class HiveRelMdCost { + + private final HiveCostModel hiveCostModel; + + public HiveRelMdCost(HiveCostModel hiveCostModel) { + this.hiveCostModel = hiveCostModel; + } + + public RelMetadataProvider getMetadataProvider() { + return ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource(this, + BuiltInMethod.NON_CUMULATIVE_COST.method), + RelMdPercentageOriginalRows.SOURCE)); + } + + public RelOptCost getNonCumulativeCost(HiveAggregate aggregate) { + return hiveCostModel.getAggregateCost(aggregate); + } + + public RelOptCost getNonCumulativeCost(HiveJoin join) { + return hiveCostModel.getJoinCost(join); + } + + // Default case + public RelOptCost getNonCumulativeCost(RelNode rel) { + return hiveCostModel.getDefaultCost(); + } + +} + +// End HiveRelMdCost.java Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java (revision 0) @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; + +/** + * Default implementation of the cost model. + * Currently used by MR and Spark execution engines. + */ +public class HiveDefaultCostModel extends HiveCostModel { + + private static HiveDefaultCostModel INSTANCE; + + synchronized public static HiveDefaultCostModel getCostModel() { + if (INSTANCE == null) { + INSTANCE = new HiveDefaultCostModel(); + } + + return INSTANCE; + } + + private HiveDefaultCostModel() { + super(Sets.newHashSet(DefaultJoinAlgorithm.INSTANCE)); + } + + @Override + public RelOptCost getDefaultCost() { + return HiveCost.FACTORY.makeZeroCost(); + } + + @Override + public RelOptCost getAggregateCost(HiveAggregate aggregate) { + return HiveCost.FACTORY.makeZeroCost(); + } + + + /** + * Default join algorithm. Cost is based on cardinality. + */ + public static class DefaultJoinAlgorithm implements JoinAlgorithm { + + public static final JoinAlgorithm INSTANCE = new DefaultJoinAlgorithm(); + private static final String ALGORITHM_NAME = "none"; + + + @Override + public String toString() { + return ALGORITHM_NAME; + } + + @Override + public boolean isExecutable(HiveJoin join) { + return true; + } + + @Override + public RelOptCost getCost(HiveJoin join) { + double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); + double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); + } + + @Override + public ImmutableList getCollation(HiveJoin join) { + return null; + } + + @Override + public RelDistribution getDistribution(HiveJoin join) { + return null; + } + + @Override + public Double getMemory(HiveJoin join) { + return null; + } + + @Override + public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { + return null; + } + + @Override + public Boolean isPhaseTransition(HiveJoin join) { + return false; + } + + @Override + public Integer getSplitCount(HiveJoin join) { + return null; + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsConf.java (revision 0) @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.cost; + +public class HiveAlgorithmsConf { + + private Double maxSplitSize; + private Double maxMemory; + + + public HiveAlgorithmsConf(Double maxSplitSize, Double maxMemory) { + this.maxSplitSize = maxSplitSize; + this.maxMemory = maxMemory; + } + + public Double getMaxSplitSize() { + return maxSplitSize; + } + + public Double getMaxMemory() { + return maxMemory; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java (working copy) @@ -22,6 +22,7 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.volcano.VolcanoPlanner; import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; /** * Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive. @@ -34,12 +35,12 @@ private static final boolean ENABLE_COLLATION_TRAIT = true; /** Creates a HiveVolcanoPlanner. */ - public HiveVolcanoPlanner() { - super(HiveCost.FACTORY, null); + public HiveVolcanoPlanner(HiveConfigContext conf) { + super(HiveCost.FACTORY, conf); } - public static RelOptPlanner createPlanner() { - final VolcanoPlanner planner = new HiveVolcanoPlanner(); + public static RelOptPlanner createPlanner(HiveConfigContext conf) { + final VolcanoPlanner planner = new HiveVolcanoPlanner(conf); planner.addRelTraitDef(ConventionTraitDef.INSTANCE); if (ENABLE_COLLATION_TRAIT) { planner.addRelTraitDef(RelCollationTraitDef.INSTANCE); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java (working copy) @@ -54,11 +54,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -69,8 +71,8 @@ public class ASTConverter { private static final Log LOG = LogFactory.getLog(ASTConverter.class); - private RelNode root; - private HiveAST hiveAST; + private final RelNode root; + private final HiveAST hiveAST; private RelNode from; private Filter where; private Aggregate groupBy; @@ -213,7 +215,7 @@ private void convertLimitToASTNode(HiveSort limit) { if (limit != null) { - HiveSort hiveLimit = (HiveSort) limit; + HiveSort hiveLimit = limit; RexNode limitExpr = hiveLimit.getFetchExpr(); if (limitExpr != null) { Object val = ((RexLiteral) limitExpr).getValue2(); @@ -224,12 +226,12 @@ private void convertOBToASTNode(HiveSort order) { if (order != null) { - HiveSort hiveSort = (HiveSort) order; + HiveSort hiveSort = order; if (!hiveSort.getCollation().getFieldCollations().isEmpty()) { // 1 Add order by token ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); - schema = new Schema((HiveSort) hiveSort); + schema = new Schema(hiveSort); Map obRefToCallMap = hiveSort.getInputRefToCallMap(); RexNode obExpr; ASTNode astCol; @@ -370,7 +372,7 @@ static class RexVisitor extends RexVisitorImpl { private final Schema schema; - private boolean useTypeQualInLiteral; + private final boolean useTypeQualInLiteral; protected RexVisitor(Schema schema) { this(schema, false); @@ -567,7 +569,7 @@ private static final long serialVersionUID = 1L; Schema(TableScan scan) { - String tabName = ((RelOptHiveTable) scan.getTable()).getTableAlias(); + String tabName = ((HiveTableScan) scan).getTableAlias(); for (RelDataTypeField field : scan.getRowType().getFieldList()) { add(new ColumnInfo(tabName, field.getName())); } @@ -641,7 +643,13 @@ add(new ColumnInfo(null, projName)); } } + + public Schema(String tabAlias, List fieldList) { + for (RelDataTypeField field : fieldList) { + add(new ColumnInfo(tabAlias, field.getName())); } + } + } /* * represents Column information exposed by a QueryBlock. Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java (revision 0) @@ -0,0 +1,892 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistribution.Type; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.SortExchange; +import org.apache.calcite.rel.logical.LogicalExchange; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.parse.JoinCond; +import org.apache.hadoop.hive.ql.parse.JoinType; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; +import org.apache.hadoop.hive.ql.parse.PTFTranslator; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.UnparseTranslator; +import org.apache.hadoop.hive.ql.parse.WindowingComponentizer; +import org.apache.hadoop.hive.ql.parse.WindowingSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.plan.UnionDesc; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +public class HiveOpConverter { + + private static final Log LOG = LogFactory.getLog(HiveOpConverter.class); + + public static enum HIVEAGGOPMODE { + NO_SKEW_NO_MAP_SIDE_AGG, // Corresponds to SemAnalyzer genGroupByPlan1MR + SKEW_NO_MAP_SIDE_AGG, // Corresponds to SemAnalyzer genGroupByPlan2MR + NO_SKEW_MAP_SIDE_AGG, // Corresponds to SemAnalyzer + // genGroupByPlanMapAggrNoSkew + SKEW_MAP_SIDE_AGG // Corresponds to SemAnalyzer genGroupByPlanMapAggr2MR + }; + + // TODO: remove this after stashing only rqd pieces from opconverter + private final SemanticAnalyzer semanticAnalyzer; + private final HiveConf hiveConf; + private final UnparseTranslator unparseTranslator; + private final Map> topOps; + private final boolean strictMode; + private int reduceSinkTagGenerator; + + public HiveOpConverter(SemanticAnalyzer semanticAnalyzer, HiveConf hiveConf, + UnparseTranslator unparseTranslator, Map> topOps, + boolean strictMode) { + this.semanticAnalyzer = semanticAnalyzer; + this.hiveConf = hiveConf; + this.unparseTranslator = unparseTranslator; + this.topOps = topOps; + this.strictMode = strictMode; + this.reduceSinkTagGenerator = 0; + } + + static class OpAttr { + final String tabAlias; + ImmutableList inputs; + ImmutableMap vcolMap; + + OpAttr(String tabAlias, Map vcolMap, Operator... inputs) { + this.tabAlias = tabAlias; + this.vcolMap = ImmutableMap.copyOf(vcolMap); + this.inputs = ImmutableList.copyOf(inputs); + } + + private OpAttr clone(Operator... inputs) { + return new OpAttr(tabAlias, this.vcolMap, inputs); + } + } + + public Operator convert(RelNode root) throws SemanticException { + OpAttr opAf = dispatch(root); + return opAf.inputs.get(0); + } + + OpAttr dispatch(RelNode rn) throws SemanticException { + if (rn instanceof HiveTableScan) { + return visit((HiveTableScan) rn); + } else if (rn instanceof HiveProject) { + return visit((HiveProject) rn); + } else if (rn instanceof HiveJoin) { + return visit((HiveJoin) rn); + } else if (rn instanceof SemiJoin) { + SemiJoin sj = (SemiJoin) rn; + HiveJoin hj = HiveJoin.getJoin(sj.getCluster(), sj.getLeft(), sj.getRight(), + sj.getCondition(), sj.getJoinType(), true); + return visit(hj); + } else if (rn instanceof HiveFilter) { + return visit((HiveFilter) rn); + } else if (rn instanceof HiveSort) { + return visit((HiveSort) rn); + } else if (rn instanceof HiveUnion) { + return visit((HiveUnion) rn); + } else if (rn instanceof SortExchange) { + return visit((SortExchange) rn); + } else if (rn instanceof HiveAggregate) { + return visit((HiveAggregate) rn); + } + LOG.error(rn.getClass().getCanonicalName() + "operator translation not supported" + + " yet in return path."); + return null; + } + + /** + * TODO: 1. PPD needs to get pushed in to TS + * + * @param scanRel + * @return + */ + OpAttr visit(HiveTableScan scanRel) { + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + + " with row type: [" + scanRel.getRowType() + "]"); + } + + RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable(); + + // 1. Setup TableScan Desc + // 1.1 Build col details used by scan + ArrayList colInfos = new ArrayList(); + List virtualCols = new ArrayList(ht.getVirtualCols()); + Map hiveScanVColMap = new HashMap(); + List partColNames = new ArrayList(); + List neededColumnIDs = new ArrayList(); + List neededColumns = new ArrayList(); + + Map posToVColMap = HiveCalciteUtil.getVColsMap(virtualCols, + ht.getNoOfNonVirtualCols()); + Map posToPartColInfo = ht.getPartColInfoMap(); + Map posToNonPartColInfo = ht.getNonPartColInfoMap(); + List neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT(); + List scanColNames = scanRel.getRowType().getFieldNames(); + String tableAlias = scanRel.getTableAlias(); + + String colName; + ColumnInfo colInfo; + VirtualColumn vc; + Integer posInRHT; + + for (int i = 0; i < neededColIndxsFrmReloptHT.size(); i++) { + colName = scanColNames.get(i); + posInRHT = neededColIndxsFrmReloptHT.get(i); + if (posToVColMap.containsKey(posInRHT)) { + vc = posToVColMap.get(posInRHT); + virtualCols.add(vc); + colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden()); + hiveScanVColMap.put(i, vc); + } else if (posToPartColInfo.containsKey(posInRHT)) { + partColNames.add(colName); + colInfo = posToPartColInfo.get(posInRHT); + } else { + colInfo = posToNonPartColInfo.get(posInRHT); + } + neededColumnIDs.add(posInRHT); + neededColumns.add(colName); + colInfos.add(colInfo); + } + + // 1.2 Create TableScanDesc + TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD()); + + // 1.3. Set Partition cols in TSDesc + tsd.setPartColumns(partColNames); + + // 1.4. Set needed cols in TSDesc + tsd.setNeededColumnIDs(neededColumnIDs); + tsd.setNeededColumns(neededColumns); + + // 2. Setup TableScan + TableScanOperator ts = (TableScanOperator) OperatorFactory.get(tsd, new RowSchema(colInfos)); + + topOps.put(ht.getQBID(), ts); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]"); + } + + return new OpAttr(tableAlias, hiveScanVColMap, ts); + } + + OpAttr visit(HiveProject projectRel) throws SemanticException { + OpAttr inputOpAf = dispatch(projectRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + projectRel.getId() + ":" + + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]"); + } + + WindowingSpec windowingSpec = new WindowingSpec(); + List exprCols = new ArrayList(); + for (int pos = 0; pos < projectRel.getChildExps().size(); pos++) { + ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel + .getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), + projectRel.getRowType(), false, projectRel.getCluster().getTypeFactory()); + exprCols.add(projectRel.getChildExps().get(pos).accept(converter)); + if (converter.getWindowFunctionSpec() != null) { + windowingSpec.addWindowFunction(converter.getWindowFunctionSpec()); + } + } + if (windowingSpec.getWindowExpressions() != null + && !windowingSpec.getWindowExpressions().isEmpty()) { + inputOpAf = genPTF(inputOpAf, windowingSpec); + } + // TODO: is this a safe assumption (name collision, external names...) + List exprNames = new ArrayList(projectRel.getRowType().getFieldNames()); + SelectDesc sd = new SelectDesc(exprCols, exprNames); + Pair, Map> colInfoVColPair = createColInfos( + projectRel.getChildExps(), exprCols, exprNames, inputOpAf); + SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema( + colInfoVColPair.getKey()), inputOpAf.inputs.get(0)); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]"); + } + + return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp); + } + + OpAttr visit(HiveJoin joinRel) throws SemanticException { + // 1. Convert inputs + OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()]; + List> children = new ArrayList>(joinRel.getInputs().size()); + for (int i = 0; i < inputs.length; i++) { + inputs[i] = dispatch(joinRel.getInput(i)); + children.add(inputs[i].inputs.get(0)); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + + " with row type: [" + joinRel.getRowType() + "]"); + } + + // 2. Convert join condition + JoinPredicateInfo joinPredInfo = JoinPredicateInfo.constructJoinPredicateInfo(joinRel); + + // 3. Extract join keys from condition + ExprNodeDesc[][] joinKeys = extractJoinKeys(joinPredInfo, joinRel.getInputs()); + + // 4. Generate Join operator + JoinOperator joinOp = genJoin(joinRel, joinPredInfo, children, joinKeys); + + // 5. TODO: Extract condition for non-equi join elements (if any) and + // add it + + // 6. Virtual columns + Map vcolMap = new HashMap(); + vcolMap.putAll(inputs[0].vcolMap); + if (extractJoinType(joinRel) != JoinType.LEFTSEMI) { + int shift = inputs[0].inputs.get(0).getSchema().getSignature().size(); + for (int i = 1; i < inputs.length; i++) { + vcolMap.putAll(HiveCalciteUtil.shiftVColsMap(inputs[i].vcolMap, shift)); + shift += inputs[i].inputs.get(0).getSchema().getSignature().size(); + } + } + + // 8. Return result + return new OpAttr(null, vcolMap, joinOp); + } + + OpAttr visit(HiveAggregate aggRel) throws SemanticException { + OpAttr inputOpAf = dispatch(aggRel.getInput()); + return HiveGBOpConvUtil.translateGB(inputOpAf, aggRel, hiveConf); + } + + OpAttr visit(HiveSort sortRel) throws SemanticException { + OpAttr inputOpAf = dispatch(sortRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " with row type: [" + sortRel.getRowType() + "]"); + if (sortRel.getCollation() == RelCollations.EMPTY) { + LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " consists of limit"); + } else if (sortRel.fetch == null) { + LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " consists of sort"); + } else { + LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + + " consists of sort+limit"); + } + } + + Operator inputOp = inputOpAf.inputs.get(0); + Operator resultOp = inputOpAf.inputs.get(0); + // 1. If we need to sort tuples based on the value of some + // of their columns + if (sortRel.getCollation() != RelCollations.EMPTY) { + + // In strict mode, in the presence of order by, limit must be + // specified + if (strictMode && sortRel.fetch == null) { + throw new SemanticException(ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg()); + } + + // 1.a. Extract order for each column from collation + // Generate sortCols and order + List sortCols = new ArrayList(); + StringBuilder order = new StringBuilder(); + for (RelCollation collation : sortRel.getCollationList()) { + for (RelFieldCollation sortInfo : collation.getFieldCollations()) { + int sortColumnPos = sortInfo.getFieldIndex(); + ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature() + .get(sortColumnPos)); + ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), + columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol()); + sortCols.add(sortColumn); + if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) { + order.append("-"); + } else { + order.append("+"); + } + } + } + // Use only 1 reducer for order by + int numReducers = 1; + + // 1.b. Generate reduce sink and project operator + resultOp = genReduceSinkAndBacktrackSelect(resultOp, + sortCols.toArray(new ExprNodeDesc[sortCols.size()]), -1, new ArrayList(), + order.toString(), numReducers, Operation.NOT_ACID, strictMode); + } + + // 2. If we need to generate limit + if (sortRel.fetch != null) { + int limit = RexLiteral.intValue(sortRel.fetch); + LimitDesc limitDesc = new LimitDesc(limit); + // TODO: Set 'last limit' global property + ArrayList cinfoLst = createColInfos(inputOp); + resultOp = OperatorFactory.getAndMakeChild(limitDesc, + new RowSchema(cinfoLst), resultOp); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]"); + } + } + + // 3. Return result + return inputOpAf.clone(resultOp); + } + + /** + * TODO: 1) isSamplingPred 2) sampleDesc 3) isSortedFilter + */ + OpAttr visit(HiveFilter filterRel) throws SemanticException { + OpAttr inputOpAf = dispatch(filterRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + filterRel.getId() + ":" + filterRel.getRelTypeName() + + " with row type: [" + filterRel.getRowType() + "]"); + } + + ExprNodeDesc filCondExpr = filterRel.getCondition().accept( + new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), false, + filterRel.getCluster().getTypeFactory())); + FilterDesc filDesc = new FilterDesc(filCondExpr, false); + ArrayList cinfoLst = createColInfos(inputOpAf.inputs.get(0)); + FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, new RowSchema( + cinfoLst), inputOpAf.inputs.get(0)); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + filOp + " with row schema: [" + filOp.getSchema() + "]"); + } + + return inputOpAf.clone(filOp); + } + + OpAttr visit(HiveUnion unionRel) throws SemanticException { + // 1. Convert inputs + OpAttr[] inputs = new OpAttr[unionRel.getInputs().size()]; + for (int i = 0; i < inputs.length; i++) { + inputs[i] = dispatch(unionRel.getInput(i)); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + + " with row type: [" + unionRel.getRowType() + "]"); + } + + // 2. Create a new union operator + UnionDesc unionDesc = new UnionDesc(); + unionDesc.setNumInputs(inputs.length); + ArrayList cinfoLst = createColInfos(inputs[0].inputs.get(0)); + Operator[] children = new Operator[inputs.length]; + for (int i = 0; i < children.length; i++) { + children[i] = inputs[i].inputs.get(0); + } + Operator unionOp = OperatorFactory.getAndMakeChild(unionDesc, + new RowSchema(cinfoLst), children); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]"); + } + + // 3. Return result + return inputs[0].clone(unionOp); + } + + OpAttr visit(SortExchange exchangeRel) throws SemanticException { + OpAttr inputOpAf = dispatch(exchangeRel.getInput()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]"); + } + + RelDistribution distribution = exchangeRel.getDistribution(); + if (distribution.getType() != Type.HASH_DISTRIBUTED) { + throw new SemanticException("Only hash distribution supported for LogicalExchange"); + } + ExprNodeDesc[] expressions = new ExprNodeDesc[distribution.getKeys().size()]; + for (int i = 0; i < distribution.getKeys().size(); i++) { + int key = distribution.getKeys().get(i); + ColumnInfo colInfo = inputOpAf.inputs.get(0).getSchema().getSignature().get(key); + ExprNodeDesc column = new ExprNodeColumnDesc(colInfo); + expressions[i] = column; + } + + ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), expressions, + reduceSinkTagGenerator++, -1, Operation.NOT_ACID, strictMode); + + return inputOpAf.clone(rsOp); + } + + private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticException { + Operator input = inputOpAf.inputs.get(0); + + wSpec.validateAndMakeEffective(); + WindowingComponentizer groups = new WindowingComponentizer(wSpec); + RowResolver rr = new RowResolver(); + for (ColumnInfo ci : input.getSchema().getSignature()) { + rr.put(ci.getTabAlias(), ci.getInternalName(), ci); + } + + while (groups.hasNext()) { + wSpec = groups.next(hiveConf, semanticAnalyzer, unparseTranslator, rr); + + // 1. Create RS and backtrack Select operator on top + ArrayList keyCols = new ArrayList(); + ArrayList partCols = new ArrayList(); + StringBuilder order = new StringBuilder(); + + for (PartitionExpression partCol : wSpec.getQueryPartitionSpec().getExpressions()) { + ExprNodeDesc partExpr = semanticAnalyzer.genExprNodeDesc(partCol.getExpression(), rr); + if (ExprNodeDescUtils.indexOf(partExpr, partCols) < 0) { + keyCols.add(partExpr); + partCols.add(partExpr); + order.append('+'); + } + } + + if (wSpec.getQueryOrderSpec() != null) { + for (OrderExpression orderCol : wSpec.getQueryOrderSpec().getExpressions()) { + ExprNodeDesc orderExpr = semanticAnalyzer.genExprNodeDesc(orderCol.getExpression(), rr); + char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-'; + int index = ExprNodeDescUtils.indexOf(orderExpr, keyCols); + if (index >= 0) { + order.setCharAt(index, orderChar); + continue; + } + keyCols.add(orderExpr); + order.append(orderChar); + } + } + + SelectOperator selectOp = genReduceSinkAndBacktrackSelect(input, + keyCols.toArray(new ExprNodeDesc[keyCols.size()]), reduceSinkTagGenerator++, partCols, + order.toString(), -1, Operation.NOT_ACID, strictMode); + + // 2. Finally create PTF + PTFTranslator translator = new PTFTranslator(); + PTFDesc ptfDesc = translator.translate(wSpec, semanticAnalyzer, hiveConf, rr, + unparseTranslator); + RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr(); + + Operator ptfOp = OperatorFactory.getAndMakeChild(ptfDesc, + new RowSchema(ptfOpRR.getColumnInfos()), selectOp); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + ptfOp + " with row schema: [" + ptfOp.getSchema() + "]"); + } + + // 3. Prepare for next iteration (if any) + rr = ptfOpRR; + input = ptfOp; + } + + return inputOpAf.clone(input); + } + + private ExprNodeDesc[][] extractJoinKeys(JoinPredicateInfo joinPredInfo, List inputs) { + ExprNodeDesc[][] joinKeys = new ExprNodeDesc[inputs.size()][]; + for (int i = 0; i < inputs.size(); i++) { + joinKeys[i] = new ExprNodeDesc[joinPredInfo.getEquiJoinPredicateElements().size()]; + for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(j); + RexNode key = joinLeafPredInfo.getJoinKeyExprs(j).get(0); + joinKeys[i][j] = convertToExprNode(key, inputs.get(j), null); + } + } + return joinKeys; + } + + private static SelectOperator genReduceSinkAndBacktrackSelect(Operator input, + ExprNodeDesc[] keys, int tag, ArrayList partitionCols, String order, + int numReducers, Operation acidOperation, boolean strictMode) throws SemanticException { + // 1. Generate RS operator + ReduceSinkOperator rsOp = genReduceSink(input, keys, tag, partitionCols, order, numReducers, + acidOperation, strictMode); + + // 2. Generate backtrack Select operator + Map descriptors = buildBacktrackFromReduceSink(rsOp, + input); + SelectDesc selectDesc = new SelectDesc(new ArrayList(descriptors.values()), + new ArrayList(descriptors.keySet())); + ArrayList cinfoLst = createColInfos(input); + SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc, + new RowSchema(cinfoLst), rsOp); + selectOp.setColumnExprMap(descriptors); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]"); + } + + return selectOp; + } + + private static ReduceSinkOperator genReduceSink(Operator input, ExprNodeDesc[] keys, int tag, + int numReducers, Operation acidOperation, boolean strictMode) throws SemanticException { + return genReduceSink(input, keys, tag, new ArrayList(), "", numReducers, + acidOperation, strictMode); + } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + private static ReduceSinkOperator genReduceSink(Operator input, ExprNodeDesc[] keys, int tag, + ArrayList partitionCols, String order, int numReducers, + Operation acidOperation, boolean strictMode) throws SemanticException { + Operator dummy = Operator.createDummy(); // dummy for backtracking + dummy.setParentOperators(Arrays.asList(input)); + + ArrayList reduceKeys = new ArrayList(); + ArrayList reduceKeysBack = new ArrayList(); + + // Compute join keys and store in reduceKeys + for (ExprNodeDesc key : keys) { + reduceKeys.add(key); + reduceKeysBack.add(ExprNodeDescUtils.backtrack(key, dummy, input)); + } + + // Walk over the input schema and copy in the output + ArrayList reduceValues = new ArrayList(); + ArrayList reduceValuesBack = new ArrayList(); + Map colExprMap = new HashMap(); + + List inputColumns = input.getSchema().getSignature(); + ArrayList outputColumns = new ArrayList(); + List outputColumnNames = new ArrayList(); + int[] index = new int[inputColumns.size()]; + for (int i = 0; i < inputColumns.size(); i++) { + ColumnInfo colInfo = inputColumns.get(i); + String outputColName = colInfo.getInternalName(); + ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo); + + // backtrack can be null when input is script operator + ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, input); + int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack); + if (kindex >= 0) { + ColumnInfo newColInfo = new ColumnInfo(colInfo); + newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex); + newColInfo.setAlias(outputColName); + newColInfo.setTabAlias(colInfo.getTabAlias()); + outputColumns.add(newColInfo); + index[i] = kindex; + continue; + } + int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack); + if (kindex >= 0) { + index[i] = -vindex - 1; + continue; + } + index[i] = -reduceValues.size() - 1; + + reduceValues.add(expr); + reduceValuesBack.add(exprBack); + + ColumnInfo newColInfo = new ColumnInfo(colInfo); + newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName); + newColInfo.setAlias(outputColName); + newColInfo.setTabAlias(colInfo.getTabAlias()); + + outputColumns.add(newColInfo); + outputColumnNames.add(outputColName); + } + dummy.setParentOperators(null); + + // Use only 1 reducer if no reduce keys + if (reduceKeys.size() == 0) { + numReducers = 1; + + // Cartesian product is not supported in strict mode + if (strictMode) { + throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg()); + } + } + + ReduceSinkDesc rsDesc; + if (order.isEmpty()) { + rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, + reduceKeys.size(), numReducers, acidOperation); + } else { + rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, + partitionCols, order, numReducers, acidOperation); + } + + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, + new RowSchema(outputColumns), input); + + List keyColNames = rsDesc.getOutputKeyColumnNames(); + for (int i = 0; i < keyColNames.size(); i++) { + colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i)); + } + List valColNames = rsDesc.getOutputValueColumnNames(); + for (int i = 0; i < valColNames.size(); i++) { + colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i)); + } + + rsOp.setValueIndex(index); + rsOp.setColumnExprMap(colExprMap); + rsOp.setInputAliases(input.getSchema().getColumnNames() + .toArray(new String[input.getSchema().getColumnNames().size()])); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + rsOp + " with row schema: [" + rsOp.getSchema() + "]"); + } + + return rsOp; + } + + private static JoinOperator genJoin(HiveJoin hiveJoin, JoinPredicateInfo joinPredInfo, + List> children, ExprNodeDesc[][] joinKeys) throws SemanticException { + + // Extract join type + JoinType joinType = extractJoinType(hiveJoin); + + // NOTE: Currently binary joins only + JoinCondDesc[] joinCondns = new JoinCondDesc[1]; + joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType)); + + ArrayList outputColumns = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(hiveJoin.getRowType() + .getFieldNames()); + Operator[] childOps = new Operator[children.size()]; + + Map reversedExprs = new HashMap(); + HashMap> exprMap = new HashMap>(); + Map colExprMap = new HashMap(); + HashMap> posToAliasMap = new HashMap>(); + + int outputPos = 0; + for (int pos = 0; pos < children.size(); pos++) { + ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos); + if (inputRS.getNumParent() != 1) { + throw new SemanticException("RS should have single parent"); + } + Operator parent = inputRS.getParentOperators().get(0); + ReduceSinkDesc rsDesc = inputRS.getConf(); + + int[] index = inputRS.getValueIndex(); + + Byte tag = (byte) rsDesc.getTag(); + + // Semijoin + if (joinType == JoinType.LEFTSEMI && pos != 0) { + exprMap.put(tag, new ArrayList()); + childOps[pos] = inputRS; + continue; + } + + List keyColNames = rsDesc.getOutputKeyColumnNames(); + List valColNames = rsDesc.getOutputValueColumnNames(); + + posToAliasMap.put(pos, new HashSet(inputRS.getSchema().getTableNames())); + + Map descriptors = buildBacktrackFromReduceSink(outputPos, + outputColumnNames, keyColNames, valColNames, index, parent); + + List parentColumns = parent.getSchema().getSignature(); + for (int i = 0; i < index.length; i++) { + ColumnInfo info = new ColumnInfo(parentColumns.get(i)); + info.setInternalName(outputColumnNames.get(outputPos)); + outputColumns.add(info); + reversedExprs.put(outputColumnNames.get(outputPos), tag); + outputPos++; + } + + exprMap.put(tag, new ArrayList(descriptors.values())); + colExprMap.putAll(descriptors); + childOps[pos] = inputRS; + } + + boolean noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER + && joinType != JoinType.RIGHTOUTER; + JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, joinKeys); + desc.setReversedExprs(reversedExprs); + + JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc, new RowSchema( + outputColumns), childOps); + joinOp.setColumnExprMap(colExprMap); + joinOp.setPosToAliasMap(posToAliasMap); + + // TODO: null safes? + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]"); + } + + return joinOp; + } + + private static JoinType extractJoinType(HiveJoin join) { + // UNIQUE + if (join.isDistinct()) { + return JoinType.UNIQUE; + } + // SEMIJOIN + if (join.isLeftSemiJoin()) { + return JoinType.LEFTSEMI; + } + // OUTER AND INNER JOINS + JoinType resultJoinType; + switch (join.getJoinType()) { + case FULL: + resultJoinType = JoinType.FULLOUTER; + break; + case LEFT: + resultJoinType = JoinType.LEFTOUTER; + break; + case RIGHT: + resultJoinType = JoinType.RIGHTOUTER; + break; + default: + resultJoinType = JoinType.INNER; + break; + } + return resultJoinType; + } + + private static Map buildBacktrackFromReduceSink(ReduceSinkOperator rsOp, + Operator inputOp) { + return buildBacktrackFromReduceSink(0, inputOp.getSchema().getColumnNames(), rsOp.getConf() + .getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(), + rsOp.getValueIndex(), inputOp); + } + + private static Map buildBacktrackFromReduceSink(int initialPos, + List outputColumnNames, List keyColNames, List valueColNames, + int[] index, Operator inputOp) { + Map columnDescriptors = new LinkedHashMap(); + for (int i = 0; i < index.length; i++) { + ColumnInfo info = new ColumnInfo(inputOp.getSchema().getSignature().get(i)); + String field; + if (index[i] >= 0) { + field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]); + } else { + field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1); + } + ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), + info.getIsVirtualCol()); + columnDescriptors.put(outputColumnNames.get(initialPos + i), desc); + } + return columnDescriptors; + } + + private static ExprNodeDesc convertToExprNode(RexNode rn, RelNode inputRel, String tabAlias) { + return rn.accept(new ExprNodeConverter(tabAlias, inputRel.getRowType(), false, + inputRel.getCluster().getTypeFactory())); + } + + private static ArrayList createColInfos(Operator input) { + ArrayList cInfoLst = new ArrayList(); + for (ColumnInfo ci : input.getSchema().getSignature()) { + cInfoLst.add(new ColumnInfo(ci)); + } + return cInfoLst; + } + + private static Pair, Map> createColInfos( + List calciteExprs, List hiveExprs, List projNames, + OpAttr inpOpAf) { + if (hiveExprs.size() != projNames.size()) { + throw new RuntimeException("Column expressions list doesn't match Column Names list"); + } + + RexNode rexN; + ExprNodeDesc pe; + ArrayList colInfos = new ArrayList(); + VirtualColumn vc; + Map newVColMap = new HashMap(); + for (int i = 0; i < hiveExprs.size(); i++) { + pe = hiveExprs.get(i); + rexN = calciteExprs.get(i); + vc = null; + if (rexN instanceof RexInputRef) { + vc = inpOpAf.vcolMap.get(((RexInputRef) rexN).getIndex()); + if (vc != null) { + newVColMap.put(i, vc); + } + } + colInfos + .add(new ColumnInfo(projNames.get(i), pe.getTypeInfo(), inpOpAf.tabAlias, vc != null)); + } + + return new Pair, Map>(colInfos, newVColMap); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java (revision 0) @@ -0,0 +1,1237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.translator; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import com.google.common.collect.ImmutableList; + +/** + * TODO:
+ * 1. Change the output col/ExprNodeColumn names to external names.
+ * 2. Verify if we need to use the "KEY."/"VALUE." in RS cols; switch to + * external names if possible.
+ * 3. In ExprNode & in ColumnInfo the tableAlias/VirtualColumn is specified + * differently for different GB/RS in pipeline. Remove the different treatments. + * 3. VirtualColMap needs to be maintained + * + */ +public class HiveGBOpConvUtil { + private static enum HIVEGBPHYSICALMODE { + MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB, MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB, MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT, MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT, NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW + }; + + private static class UDAFAttrs { + private boolean isDistinctUDAF; + private String udafName; + private GenericUDAFEvaluator udafEvaluator; + private final ArrayList udafParams = new ArrayList(); + private List udafParamsIndxInGBInfoDistExprs = new ArrayList(); + }; + + private static class GBInfo { + private final List outputColNames = new ArrayList(); + + private final List gbKeyColNamesInInput = new ArrayList(); + private final List gbKeyTypes = new ArrayList(); + private final List gbKeys = new ArrayList(); + + private final List grpSets = new ArrayList(); + private boolean grpSetRqrAdditionalMRJob; + private boolean grpIdFunctionNeeded; + + private final List distExprNames = new ArrayList(); + private final List distExprTypes = new ArrayList(); + private final List distExprNodes = new ArrayList(); + private final List> distColIndices = new ArrayList>(); + + private final List deDupedNonDistIrefs = new ArrayList(); + + private final List udafAttrs = new ArrayList(); + private boolean containsDistinctAggr = false; + + float groupByMemoryUsage; + float memoryThreshold; + + private HIVEGBPHYSICALMODE gbPhysicalPipelineMode; + }; + + private static HIVEGBPHYSICALMODE getAggOPMode(HiveConf hc, GBInfo gbInfo) { + HIVEGBPHYSICALMODE gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB; + + if (hc.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { + if (!hc.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { + if (!gbInfo.grpSetRqrAdditionalMRJob) { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB; + } else { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB; + } + } else { + if (gbInfo.containsDistinctAggr || !gbInfo.gbKeys.isEmpty()) { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT; + } else { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT; + } + } + } else { + if (!hc.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW; + } else { + gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_SKEW; + } + } + + return gbPhysicalPipelineMode; + } + + // For each of the GB op in the logical GB this should be called seperately; + // otherwise GBevaluator and expr nodes may get shared among multiple GB ops + private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf hc) throws SemanticException { + GBInfo gbInfo = new GBInfo(); + + // 0. Collect AggRel output col Names + gbInfo.outputColNames.addAll(aggRel.getRowType().getFieldNames()); + + // 1. Collect GB Keys + RelNode aggInputRel = aggRel.getInput(); + ExprNodeConverter exprConv = new ExprNodeConverter(inputOpAf.tabAlias, + aggInputRel.getRowType(), false, aggRel.getCluster().getTypeFactory()); + + ExprNodeDesc tmpExprNodeDesc; + for (int i : aggRel.getGroupSet()) { + RexInputRef iRef = new RexInputRef(i, aggInputRel.getRowType().getFieldList() + .get(i).getType()); + tmpExprNodeDesc = iRef.accept(exprConv); + gbInfo.gbKeys.add(tmpExprNodeDesc); + gbInfo.gbKeyColNamesInInput.add(aggInputRel.getRowType().getFieldNames().get(i)); + gbInfo.gbKeyTypes.add(tmpExprNodeDesc.getTypeInfo()); + } + + // 2. Collect Grouping Set info + if (aggRel.indicator) { + // 2.1 Translate Grouping set col bitset + ImmutableList lstGrpSet = aggRel.getGroupSets(); + int bitmap = 0; + for (ImmutableBitSet grpSet : lstGrpSet) { + bitmap = 0; + for (Integer bitIdx : grpSet.asList()) { + bitmap = SemanticAnalyzer.setBit(bitmap, bitIdx); + } + gbInfo.grpSets.add(bitmap); + } + Collections.sort(gbInfo.grpSets); + + // 2.2 Check if GRpSet require additional MR Job + gbInfo.grpSetRqrAdditionalMRJob = gbInfo.grpSets.size() > hc + .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY); + + // 2.3 Check if GROUPING_ID needs to be projected out + if (!aggRel.getAggCallList().isEmpty() + && (aggRel.getAggCallList().get(aggRel.getAggCallList().size() - 1).getAggregation() == HiveGroupingID.INSTANCE)) { + gbInfo.grpIdFunctionNeeded = true; + } + } + + // 3. Walk through UDAF & Collect Distinct Info + Set distinctRefs = new HashSet(); + Map distParamInRefsToOutputPos = new HashMap(); + for (AggregateCall aggCall : aggRel.getAggCallList()) { + if ((aggCall.getAggregation() == HiveGroupingID.INSTANCE) || !aggCall.isDistinct()) { + continue; + } + + List argLst = new ArrayList(aggCall.getArgList()); + List argNames = HiveCalciteUtil.getFieldNames(argLst, aggInputRel); + ExprNodeDesc distinctExpr; + for (int i = 0; i < argLst.size(); i++) { + if (!distinctRefs.contains(argLst.get(i))) { + distinctRefs.add(argLst.get(i)); + distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size()); + distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv); + gbInfo.distExprNodes.add(distinctExpr); + gbInfo.distExprNames.add(argNames.get(i)); + gbInfo.distExprTypes.add(distinctExpr.getTypeInfo()); + } + } + } + + // 4. Walk through UDAF & Collect UDAF Info + Set deDupedNonDistIrefsSet = new HashSet(); + for (AggregateCall aggCall : aggRel.getAggCallList()) { + if (aggCall.getAggregation() == HiveGroupingID.INSTANCE) { + continue; + } + + UDAFAttrs udafAttrs = new UDAFAttrs(); + udafAttrs.udafParams.addAll(HiveCalciteUtil.getExprNodes(aggCall.getArgList(), aggInputRel, + inputOpAf.tabAlias)); + udafAttrs.udafName = aggCall.getAggregation().getName(); + udafAttrs.isDistinctUDAF = aggCall.isDistinct(); + List argLst = new ArrayList(aggCall.getArgList()); + List distColIndicesOfUDAF = new ArrayList(); + List distUDAFParamsIndxInDistExprs = new ArrayList(); + for (int i = 0; i < argLst.size(); i++) { + // NOTE: distinct expr can not be part of of GB key (we assume plan + // gen would have prevented it) + if (udafAttrs.isDistinctUDAF) { + distColIndicesOfUDAF.add(distParamInRefsToOutputPos.get(argLst.get(i))); + distUDAFParamsIndxInDistExprs.add(distParamInRefsToOutputPos.get(argLst.get(i))); + } else { + // TODO: this seems wrong (following what Hive Regular does) + if (!distParamInRefsToOutputPos.containsKey(argLst.get(i)) + && !deDupedNonDistIrefsSet.contains(argLst.get(i))) { + deDupedNonDistIrefsSet.add(i); + gbInfo.deDupedNonDistIrefs.add(udafAttrs.udafParams.get(i)); + } + } + } + + if (udafAttrs.isDistinctUDAF) { + gbInfo.containsDistinctAggr = true; + + udafAttrs.udafParamsIndxInGBInfoDistExprs = distUDAFParamsIndxInDistExprs; + gbInfo.distColIndices.add(distColIndicesOfUDAF); + } + + // special handling for count, similar to PlanModifierForASTConv::replaceEmptyGroupAggr() + udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName, + new ArrayList(udafAttrs.udafParams), new ASTNode(), + udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 && + "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false); + gbInfo.udafAttrs.add(udafAttrs); + } + + // 4. Gather GB Memory threshold + gbInfo.groupByMemoryUsage = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); + gbInfo.memoryThreshold = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + + // 5. Gather GB Physical pipeline (based on user config & Grping Sets size) + gbInfo.gbPhysicalPipelineMode = getAggOPMode(hc, gbInfo); + + return gbInfo; + } + + static OpAttr translateGB(OpAttr inputOpAf, HiveAggregate aggRel, HiveConf hc) + throws SemanticException { + OpAttr translatedGBOpAttr = null; + GBInfo gbInfo = getGBInfo(aggRel, inputOpAf, hc); + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB: + translatedGBOpAttr = genMapSideGBNoSkewNoAddMRJob(inputOpAf, aggRel, gbInfo); + break; + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + translatedGBOpAttr = genMapSideGBNoSkewAddMRJob(inputOpAf, aggRel, gbInfo); + break; + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + translatedGBOpAttr = genMapSideGBSkewGBKeysOrDistUDAFPresent(inputOpAf, aggRel, gbInfo); + break; + case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT: + translatedGBOpAttr = genMapSideGBSkewGBKeysAndDistUDAFNotPresent(inputOpAf, aggRel, gbInfo); + break; + case NO_MAP_SIDE_GB_NO_SKEW: + translatedGBOpAttr = genNoMapSideGBNoSkew(inputOpAf, aggRel, gbInfo); + break; + case NO_MAP_SIDE_GB_SKEW: + translatedGBOpAttr = genNoMapSideGBSkew(inputOpAf, aggRel, gbInfo); + break; + } + + return translatedGBOpAttr; + } + + /** + * GB-RS-GB1 + * + * Construct GB-RS-GB Pipe line. User has enabled Map Side GB, specified no + * skew and Grp Set is below the threshold. + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBNoSkewNoAddMRJob(OpAttr inputOpAf, HiveAggregate aggRel, + GBInfo gbInfo) throws SemanticException { + OpAttr mapSideGB = null; + OpAttr mapSideRS = null; + OpAttr reduceSideGB = null; + + // 1. Insert MapSide GB + mapSideGB = genMapSideGB(inputOpAf, gbInfo); + + // 2. Insert MapSide RS + mapSideRS = genMapSideGBRS(mapSideGB, gbInfo); + + // 3. Insert ReduceSide GB + reduceSideGB = genReduceSideGB1(mapSideRS, gbInfo, false, false, GroupByDesc.Mode.MERGEPARTIAL); + + return reduceSideGB; + } + + /** + * GB-RS-GB1-RS-GB2 + */ + private static OpAttr genGBRSGBRSGBOpPipeLine(OpAttr inputOpAf, HiveAggregate aggRel, + GBInfo gbInfo) throws SemanticException { + OpAttr mapSideGB = null; + OpAttr mapSideRS = null; + OpAttr reduceSideGB1 = null; + OpAttr reduceSideRS = null; + OpAttr reduceSideGB2 = null; + + // 1. Insert MapSide GB + mapSideGB = genMapSideGB(inputOpAf, gbInfo); + + // 2. Insert MapSide RS + mapSideRS = genMapSideGBRS(mapSideGB, gbInfo); + + // 3. Insert ReduceSide GB1 + boolean computeGrpSet = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT) ? false : true; + reduceSideGB1 = genReduceSideGB1(mapSideRS, gbInfo, computeGrpSet, false, GroupByDesc.Mode.PARTIALS); + + // 4. Insert RS on reduce side with Reduce side GB as input + reduceSideRS = genReduceGBRS(reduceSideGB1, gbInfo); + + // 5. Insert ReduceSide GB2 + reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo); + + return reduceSideGB2; + } + + /** + * GB-RS-GB1-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBNoSkewAddMRJob(OpAttr inputOpAf, HiveAggregate aggRel, + GBInfo gbInfo) throws SemanticException { + // 1. Sanity check + if (gbInfo.containsDistinctAggr) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + gbInfo.grpSets.size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_DISTINCTS.getMsg(errorMsg)); + } + + // 2. Gen GB-RS-GB-RS-GB pipeline + return genGBRSGBRSGBOpPipeLine(inputOpAf, aggRel, gbInfo); + } + + /** + * GB-RS-GB1-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBSkewGBKeysOrDistUDAFPresent(OpAttr inputOpAf, + HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException { + // 1. Sanity check + if (gbInfo.grpSetRqrAdditionalMRJob) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + gbInfo.grpSets.size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); + } + + // 2. Gen GB-RS-GB-RS-GB pipeline + return genGBRSGBRSGBOpPipeLine(inputOpAf, aggRel, gbInfo); + } + + /** + * GB-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genMapSideGBSkewGBKeysAndDistUDAFNotPresent(OpAttr inputOpAf, + HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException { + OpAttr mapSideGB = null; + OpAttr mapSideRS = null; + OpAttr reduceSideGB2 = null; + + // 1. Sanity check + if (gbInfo.grpSetRqrAdditionalMRJob) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + gbInfo.grpSets.size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); + } + + // 1. Insert MapSide GB + mapSideGB = genMapSideGB(inputOpAf, gbInfo); + + // 2. Insert MapSide RS + mapSideRS = genMapSideGBRS(mapSideGB, gbInfo); + + // 3. Insert ReduceSide GB2 + reduceSideGB2 = genReduceSideGB2(mapSideRS, gbInfo); + + return reduceSideGB2; + } + + /** + * RS-Gb1 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genNoMapSideGBNoSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) + throws SemanticException { + OpAttr mapSideRS = null; + OpAttr reduceSideGB1NoMapGB = null; + + // 1. Insert MapSide RS + mapSideRS = genMapSideRS(inputOpAf, gbInfo); + + // 2. Insert ReduceSide GB + reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.COMPLETE); + + return reduceSideGB1NoMapGB; + } + + /** + * RS-GB1-RS-GB2 + * + * @param inputOpAf + * @param aggRel + * @param gbInfo + * @return + * @throws SemanticException + */ + private static OpAttr genNoMapSideGBSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) + throws SemanticException { + OpAttr mapSideRS = null; + OpAttr reduceSideGB1NoMapGB = null; + OpAttr reduceSideRS = null; + OpAttr reduceSideGB2 = null; + + // 1. Insert MapSide RS + mapSideRS = genMapSideRS(inputOpAf, gbInfo); + + // 2. Insert ReduceSide GB + reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.PARTIAL1); + + // 3. Insert RS on reduce side with Reduce side GB as input + reduceSideRS = genReduceGBRS(reduceSideGB1NoMapGB, gbInfo); + + // 4. Insert ReduceSide GB2 + reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo); + + return reduceSideGB2; + } + + private static int getParallelismForReduceSideRS(GBInfo gbInfo) { + int degreeOfParallelism = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + case NO_MAP_SIDE_GB_SKEW: + if (gbInfo.gbKeys.isEmpty()) { + degreeOfParallelism = 1; + } else { + degreeOfParallelism = -1; + } + break; + default: + throw new RuntimeException( + "Unable to determine Reducer Parallelism - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return degreeOfParallelism; + } + + private static int getParallelismForMapSideRS(GBInfo gbInfo) { + int degreeOfParallelism = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB: + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + case NO_MAP_SIDE_GB_NO_SKEW: + if (gbInfo.gbKeys.isEmpty()) { + degreeOfParallelism = 1; + } else { + degreeOfParallelism = -1; + } + break; + case NO_MAP_SIDE_GB_SKEW: + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + degreeOfParallelism = -1; + break; + case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT: + degreeOfParallelism = 1; + break; + default: + throw new RuntimeException( + "Unable to determine Reducer Parallelism - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return degreeOfParallelism; + } + + private static int getNumPartFieldsForReduceSideRS(GBInfo gbInfo) { + int numPartFields = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + numPartFields = gbInfo.gbKeys.size() + 1; + break; + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + case NO_MAP_SIDE_GB_SKEW: + numPartFields = gbInfo.gbKeys.size(); + break; + default: + throw new RuntimeException( + "Unable to determine Number of Partition Fields - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return numPartFields; + } + + private static int getNumPartFieldsForMapSideRS(GBInfo gbInfo) { + int numPartFields = 0; + + switch (gbInfo.gbPhysicalPipelineMode) { + case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB: + case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB: + case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT: + case NO_MAP_SIDE_GB_NO_SKEW: + numPartFields += gbInfo.gbKeys.size(); + break; + case NO_MAP_SIDE_GB_SKEW: + case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT: + if (gbInfo.containsDistinctAggr) { + numPartFields = Integer.MAX_VALUE; + } else { + numPartFields = -1; + } + break; + default: + throw new RuntimeException( + "Unable to determine Number of Partition Fields - Invalid Physical Mode: " + + gbInfo.gbPhysicalPipelineMode); + } + + return numPartFields; + } + + private static boolean inclGrpSetInReduceSide(GBInfo gbInfo) { + boolean inclGrpSet = false; + + if (gbInfo.grpSets.size() > 0 + && (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB || gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT)) { + inclGrpSet = true; + } + + return inclGrpSet; + } + + private static boolean inclGrpSetInMapSide(GBInfo gbInfo) { + boolean inclGrpSet = false; + + if (gbInfo.grpSets.size() > 0 + && ((gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB) || + gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT)) { + inclGrpSet = true; + } + + return inclGrpSet; + } + + private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + Map colExprMap = new HashMap(); + ArrayList outputColumnNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0); + List gb1ColInfoLst = reduceSideGB1.getSchema().getSignature(); + + ArrayList reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, + gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true); + if (inclGrpSetInReduceSide(gbInfo)) { + addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, + outputColumnNames, colInfoLst, colExprMap); + } + + ArrayList reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf() + .getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true); + + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils + .getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, + getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), + AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1); + + rsOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsOp); + } + + private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + Map colExprMap = new HashMap(); + List outputKeyColumnNames = new ArrayList(); + List outputValueColumnNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0); + int distColStartIndx = gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0); + + ArrayList reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, + outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); + int keyLength = reduceKeys.size(); + + if (inclGrpSetInMapSide(gbInfo)) { + addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, + reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap); + keyLength++; + } + if (mapGB.getConf().getKeys().size() > reduceKeys.size()) { + // NOTE: All dist cols have single output col name; + reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys() + .size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false)); + } + + ArrayList reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys() + .size(), outputValueColumnNames, colInfoLst, colExprMap, false, false); + List> distinctColIndices = getDistColIndices(gbInfo, distColStartIndx); + + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils + .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, distinctColIndices, + outputKeyColumnNames, outputValueColumnNames, true, -1, + getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), + AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB); + + rsOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsOp); + } + + private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + Map colExprMap = new HashMap(); + List outputKeyColumnNames = new ArrayList(); + List outputValueColumnNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + int distColStartIndx = gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0); + String outputColName; + + // 1. Add GB Keys to reduce keys + ArrayList reduceKeys = getReduceKeysForRS(inputOpAf.inputs.get(0), 0, + gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); + int keyLength = reduceKeys.size(); + + // 2. Add Dist UDAF args to reduce keys + if (gbInfo.containsDistinctAggr) { + // TODO: Why is this needed (doesn't represent any cols) + String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size()); + outputKeyColumnNames.add(udafName); + for (int i = 0; i < gbInfo.distExprNodes.size(); i++) { + reduceKeys.add(gbInfo.distExprNodes.get(i)); + outputColName = SemanticAnalyzer.getColumnInternalName(i); + String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." + + outputColName; + ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, + false); + colInfoLst.add(colInfo); + colExprMap.put(field, gbInfo.distExprNodes.get(i)); + } + } + + // 3. Add UDAF args deduped to reduce values + ArrayList reduceValues = new ArrayList(); + for (int i = 0; i < gbInfo.deDupedNonDistIrefs.size(); i++) { + reduceValues.add(gbInfo.deDupedNonDistIrefs.get(i)); + outputColName = SemanticAnalyzer.getColumnInternalName(reduceValues.size() - 1); + outputValueColumnNames.add(outputColName); + String field = Utilities.ReduceField.VALUE.toString() + "." + outputColName; + colInfoLst.add(new ColumnInfo(field, reduceValues.get(reduceValues.size() - 1).getTypeInfo(), + null, false)); + colExprMap.put(field, reduceValues.get(reduceValues.size() - 1)); + } + + // 4. Gen RS + ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils + .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, + getDistColIndices(gbInfo, distColStartIndx), outputKeyColumnNames, + outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), + getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema( + colInfoLst), inputOpAf.inputs.get(0)); + + rsOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsOp); + } + + private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + String colOutputName = null; + ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0); + List rsColInfoLst = rs.getSchema().getSignature(); + ColumnInfo ci; + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, + gbInfo.gbKeys.size() - 1, false, false); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + ci = rsColInfoLst.get(i); + colOutputName = gbInfo.outputColNames.get(i); + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + // 1.2 Add GrpSet Col + int groupingSetsPosition = -1; + if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) { + groupingSetsPosition = gbKeys.size(); + ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false); + gbKeys.add(grpSetColExpr); + colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1); + ; + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true)); + colExprMap.put(colOutputName, grpSetColExpr); + } + + // 2. Add UDAF + UDAFAttrs udafAttr; + ArrayList aggregations = new ArrayList(); + int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() + : gbInfo.gbKeys.size() * 2; + int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1; + + for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { + udafAttr = gbInfo.udafAttrs.get(i); + ArrayList aggParameters = new ArrayList(); + aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i))); + colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i); + outputColNames.add(colOutputName); + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, + udafAttr.isDistinctUDAF); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, + aggParameters); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode)); + colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); + } + + Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, + outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, + gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), + new RowSchema(colInfoLst), rs); + + rsGBOp2.setColumnExprMap(colExprMap); + + // TODO: Shouldn't we propgate vc? is it vc col from tab or all vc + return new OpAttr("", new HashMap(), rsGBOp2); + } + + private static OpAttr genReduceSideGB1(OpAttr inputOpAf, GBInfo gbInfo, boolean computeGrpSet, + boolean propagateConstInDistinctUDAF, GroupByDesc.Mode gbMode) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + String colOutputName = null; + ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0); + List rsColInfoLst = rs.getSchema().getSignature(); + ColumnInfo ci; + boolean finalGB = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB); + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, + gbInfo.gbKeys.size() - 1, false, false); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + ci = rsColInfoLst.get(i); + if (finalGB) { + colOutputName = gbInfo.outputColNames.get(i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(i); + } + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + + // 1.2 Add GrpSet Col + int groupingSetsColPosition = -1; + if ((!finalGB && gbInfo.grpSets.size() > 0) || (finalGB && gbInfo.grpIdFunctionNeeded)) { + groupingSetsColPosition = gbInfo.gbKeys.size(); + if (computeGrpSet) { + // GrpSet Col needs to be constructed + gbKeys.add(new ExprNodeConstantDesc("0")); + } else { + // GrpSet Col already part of input RS + // TODO: Can't we just copy the ExprNodeDEsc from input (Do we need to + // explicitly set table alias to null & VC to false + gbKeys.addAll(ExprNodeDescUtils.genExprNodeDesc(rs, groupingSetsColPosition, + groupingSetsColPosition, false, true)); + } + + colOutputName = SemanticAnalyzer.getColumnInternalName(groupingSetsColPosition); + if (finalGB) { + colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1); + } + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true)); + colExprMap.put(colOutputName, gbKeys.get(groupingSetsColPosition)); + } + + // 2. Walk through UDAF and add them to GB + String lastReduceKeyColName = null; + if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) { + lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames() + .get(rs.getConf().getOutputKeyColumnNames().size() - 1); + } + int numDistinctUDFs = 0; + int distinctStartPosInReduceKeys = gbKeys.size(); + List reduceValues = rs.getConf().getValueCols(); + ArrayList aggregations = new ArrayList(); + int udafColStartPosInOriginalGB = (gbInfo.grpSets.size() > 0) ? gbInfo.gbKeys.size() * 2 + : gbInfo.gbKeys.size(); + int udafColStartPosInRS = rs.getConf().getKeyCols().size(); + for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { + UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i); + ArrayList aggParameters = new ArrayList(); + + if (udafAttr.isDistinctUDAF) { + ColumnInfo rsDistUDAFParamColInfo; + ExprNodeDesc distinctUDAFParam; + ExprNodeDesc constantPropDistinctUDAFParam; + for (int j = 0; j < udafAttr.udafParamsIndxInGBInfoDistExprs.size(); j++) { + rsDistUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j); + String rsDistUDAFParamName = rsDistUDAFParamColInfo.getInternalName(); + // TODO: verify if this is needed + if (lastReduceKeyColName != null) { + rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j); + } + distinctUDAFParam = new ExprNodeColumnDesc(rsDistUDAFParamColInfo.getType(), + rsDistUDAFParamName, rsDistUDAFParamColInfo.getTabAlias(), + rsDistUDAFParamColInfo.getIsVirtualCol()); + if (propagateConstInDistinctUDAF) { + // TODO: Implement propConstDistUDAFParams + constantPropDistinctUDAFParam = SemanticAnalyzer + .isConstantParameterInAggregationParameters( + rsDistUDAFParamColInfo.getInternalName(), reduceValues); + if (constantPropDistinctUDAFParam != null) { + distinctUDAFParam = constantPropDistinctUDAFParam; + } + } + aggParameters.add(distinctUDAFParam); + } + numDistinctUDFs++; + } else { + aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafColStartPosInRS + i))); + } + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, + aggParameters); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, + (gbMode != GroupByDesc.Mode.FINAL && udafAttr.isDistinctUDAF), udafMode)); + + if (finalGB) { + colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() + - 1); + } + + colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); + outputColNames.add(colOutputName); + } + + // Nothing special needs to be done for grouping sets if + // this is the final group by operator, and multiple rows corresponding to + // the + // grouping sets have been generated upstream. + // However, if an addition MR job has been created to handle grouping sets, + // additional rows corresponding to grouping sets need to be created here. + //TODO: Clean up/refactor assumptions + boolean includeGrpSetInGBDesc = (gbInfo.grpSets.size() > 0) + && !finalGB + && !(gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT); + Operator rsGBOp = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, + gbKeys, aggregations, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, gbInfo.grpSets, + includeGrpSetInGBDesc, groupingSetsColPosition, + gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs); + + rsGBOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsGBOp); + } + + /** + * RS-GB0 + * + * @param inputOpAf + * @param gbInfo + * @param gbMode + * @return + * @throws SemanticException + */ + private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, + GroupByDesc.Mode gbMode) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + String colOutputName = null; + ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0); + List rsColInfoLst = rs.getSchema().getSignature(); + ColumnInfo ci; + boolean useOriginalGBNames = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW); + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, + gbInfo.gbKeys.size() - 1, true, false); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + ci = rsColInfoLst.get(i); + if (useOriginalGBNames) { + colOutputName = gbInfo.outputColNames.get(i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(i); + } + outputColNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), null, false)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + + // 2. Walk through UDAF and add them to GB + String lastReduceKeyColName = null; + if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) { + lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames() + .get(rs.getConf().getOutputKeyColumnNames().size() - 1); + } + int numDistinctUDFs = 0; + int distinctStartPosInReduceKeys = gbKeys.size(); + List reduceValues = rs.getConf().getValueCols(); + ArrayList aggregations = new ArrayList(); + int udafColStartPosInOriginalGB = gbInfo.gbKeys.size(); + for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { + UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i); + ArrayList aggParameters = new ArrayList(); + + ColumnInfo rsUDAFParamColInfo; + ExprNodeDesc udafParam; + ExprNodeDesc constantPropDistinctUDAFParam; + for (int j = 0; j < udafAttr.udafParams.size(); j++) { + rsUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j); + String rsUDAFParamName = rsUDAFParamColInfo.getInternalName(); + // TODO: verify if this is needed + if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) { + rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j); + } + udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, + rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol()); + constantPropDistinctUDAFParam = SemanticAnalyzer + .isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), + reduceValues); + if (constantPropDistinctUDAFParam != null) { + udafParam = constantPropDistinctUDAFParam; + } + aggParameters.add(udafParam); + } + + if (udafAttr.isDistinctUDAF) { + numDistinctUDFs++; + } + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, + aggParameters); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode)); + if (useOriginalGBNames) { + colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i); + } else { + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() + - 1); + } + + colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); + outputColNames.add(colOutputName); + } + + Operator rsGB1 = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, + gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, + false, -1, numDistinctUDFs > 0), new RowSchema(colInfoLst), rs); + rsGB1.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), rsGB1); + } + + @SuppressWarnings("unchecked") + private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException { + ArrayList outputColNames = new ArrayList(); + ArrayList colInfoLst = new ArrayList(); + Map colExprMap = new HashMap(); + Set gbKeyColsAsNamesFrmIn = new HashSet(); + String colOutputName = null; + + // 1. Build GB Keys, grouping set starting position + // 1.1 First Add original GB Keys + ArrayList gbKeys = new ArrayList(); + for (int i = 0; i < gbAttrs.gbKeys.size(); i++) { + gbKeys.add(gbAttrs.gbKeys.get(i)); + colOutputName = SemanticAnalyzer.getColumnInternalName(i); + colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false)); + outputColNames.add(colOutputName); + gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i)); + colExprMap.put(colOutputName, gbKeys.get(i)); + } + // 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if + // needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet + // doesn't require additional MR Jobs + int groupingSetsPosition = -1; + boolean inclGrpID = inclGrpSetInMapSide(gbAttrs); + if (inclGrpID) { + groupingSetsPosition = gbKeys.size(); + addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap); + } + // 1.3. Add all distinct params + // NOTE: distinct expr can not be part of of GB key (we assume plan + // gen would have prevented it) + for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) { + if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) { + gbKeys.add(gbAttrs.distExprNodes.get(i)); + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1); + colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false)); + outputColNames.add(colOutputName); + gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i)); + colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1)); + } + } + + // 2. Build Aggregations + ArrayList aggregations = new ArrayList(); + for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) { + Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, + udafAttr.isDistinctUDAF); + aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, + udafAttr.udafParams, udafAttr.isDistinctUDAF, amode)); + GenericUDAFInfo udafInfo; + try { + udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, + udafAttr.udafParams); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() + - 1); + colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false)); + outputColNames.add(colOutputName); + } + + // 3. Create GB + @SuppressWarnings("rawtypes") + Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, + outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, + gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, + gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0)); + + // 5. Setup Expr Col Map + // NOTE: UDAF is not included in ExprColMap + gbOp.setColumnExprMap(colExprMap); + + return new OpAttr("", new HashMap(), gbOp); + } + + private static void addGrpSetCol(boolean createConstantExpr, String grpSetIDExprName, + boolean addReducePrefixToColInfoName, List exprLst, + List outputColumnNames, List colInfoLst, + Map colExprMap) throws SemanticException { + String outputColName = null; + ExprNodeDesc grpSetColExpr = null; + + if (createConstantExpr) { + grpSetColExpr = new ExprNodeConstantDesc("0"); + } else { + grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, grpSetIDExprName, + null, false); + } + exprLst.add(grpSetColExpr); + + outputColName = SemanticAnalyzer.getColumnInternalName(exprLst.size() - 1); + outputColumnNames.add(outputColName); + String internalColName = outputColName; + if (addReducePrefixToColInfoName) { + internalColName = Utilities.ReduceField.KEY.toString() + "." + outputColName; + } + colInfoLst.add(new ColumnInfo(internalColName, grpSetColExpr.getTypeInfo(), null, true)); + colExprMap.put(internalColName, grpSetColExpr); + } + + /** + * Get Reduce Keys for RS following MapSide GB + * + * @param reduceKeys + * assumed to be deduped list of exprs + * @param outputKeyColumnNames + * @param colExprMap + * @return List of ExprNodeDesc of ReduceKeys + * @throws SemanticException + */ + private static ArrayList getReduceKeysForRS(Operator inOp, int startPos, + int endPos, List outputKeyColumnNames, boolean addOnlyOneKeyColName, + ArrayList colInfoLst, Map colExprMap, + boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException { + ArrayList reduceKeys = null; + if (endPos < 0) { + reduceKeys = new ArrayList(); + } else { + reduceKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, startPos, endPos, addEmptyTabAlias, + setColToNonVirtual); + int outColNameIndx = startPos; + for (int i = 0; i < reduceKeys.size(); ++i) { + String outputColName = SemanticAnalyzer.getColumnInternalName(outColNameIndx); + outColNameIndx++; + if (!addOnlyOneKeyColName || i == 0) { + outputKeyColumnNames.add(outputColName); + } + + // TODO: Verify if this is needed (Why can't it be always null/empty + String tabAlias = addEmptyTabAlias ? "" : null; + ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false); + colInfoLst.add(colInfo); + colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i)); + } + } + + return reduceKeys; + } + + /** + * Get Value Keys for RS following MapSide GB + * + * @param GroupByOperator + * MapSide GB + * @param outputKeyColumnNames + * @param colExprMap + * @return List of ExprNodeDesc of Values + * @throws SemanticException + */ + private static ArrayList getValueKeysForRS(Operator inOp, int aggStartPos, + List outputKeyColumnNames, ArrayList colInfoLst, + Map colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) + throws SemanticException { + List mapGBColInfoLst = inOp.getSchema().getSignature(); + ArrayList valueKeys = null; + if (aggStartPos >= mapGBColInfoLst.size()) { + valueKeys = new ArrayList(); + } else { + valueKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, aggStartPos, mapGBColInfoLst.size() - 1, + true, setColToNonVirtual); + for (int i = 0; i < valueKeys.size(); ++i) { + String outputColName = SemanticAnalyzer.getColumnInternalName(i); + outputKeyColumnNames.add(outputColName); + // TODO: Verify if this is needed (Why can't it be always null/empty + String tabAlias = addEmptyTabAlias ? "" : null; + ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + + outputColName, valueKeys.get(i).getTypeInfo(), tabAlias, false); + colInfoLst.add(colInfo); + colExprMap.put(colInfo.getInternalName(), valueKeys.get(i)); + } + } + + return valueKeys; + } + + private static List> getDistColIndices(GBInfo gbAttrs, int distOffSet) + throws SemanticException { + List> distColIndices = new ArrayList>(); + + for (List udafDistCols : gbAttrs.distColIndices) { + List udfAdjustedDistColIndx = new ArrayList(); + for (Integer distIndx : udafDistCols) { + udfAdjustedDistColIndx.add(distIndx + distOffSet); + } + distColIndices.add(udfAdjustedDistColIndx); + } + + return distColIndices; + } + + // TODO: Implement this + private static ExprNodeDesc propConstDistUDAFParams() { + return null; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java (working copy) @@ -24,58 +24,93 @@ import java.util.LinkedList; import java.util.List; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.rex.RexWindow; +import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * convert a RexNode to an ExprNodeDesc */ public class ExprNodeConverter extends RexVisitorImpl { - RelDataType rType; String tabAlias; + String columnAlias; + RelDataType inputRowType; + RelDataType outputRowType; boolean partitioningExpr; + WindowFunctionSpec wfs; private final RelDataTypeFactory dTFactory; + protected final Log LOG = LogFactory.getLog(this.getClass().getName()); - public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr, RelDataTypeFactory dTFactory) { + public ExprNodeConverter(String tabAlias, RelDataType inputRowType, + boolean partitioningExpr, RelDataTypeFactory dTFactory) { + this(tabAlias, null, inputRowType, null, partitioningExpr, dTFactory); + } + + public ExprNodeConverter(String tabAlias, String columnAlias, RelDataType inputRowType, + RelDataType outputRowType, boolean partitioningExpr, RelDataTypeFactory dTFactory) { super(true); - /* - * hb: 6/25/14 for now we only support expressions that only contain - * partition cols. there is no use case for supporting generic expressions. - * for supporting generic exprs., we need to give the converter information - * on whether a column is a partition column or not, whether a column is a - * virtual column or not. - */ - assert partitioningExpr == true; this.tabAlias = tabAlias; - this.rType = rType; + this.columnAlias = columnAlias; + this.inputRowType = inputRowType; + this.outputRowType = outputRowType; this.partitioningExpr = partitioningExpr; this.dTFactory = dTFactory; } + public WindowFunctionSpec getWindowFunctionSpec() { + return this.wfs; + } + @Override public ExprNodeDesc visitInputRef(RexInputRef inputRef) { - RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + RelDataTypeField f = inputRowType.getFieldList().get(inputRef.getIndex()); return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), f.getName(), tabAlias, partitioningExpr); } + /** + * TODO: Handle 1) cast 2) Field Access 3) Windowing Over() 4, Windowing Agg Call + */ @Override public ExprNodeDesc visitCall(RexCall call) { ExprNodeGenericFuncDesc gfDesc = null; @@ -99,30 +134,46 @@ } else if (ASTConverter.isFlat(call)) { // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the // exprnode + GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2); ArrayList tmpExprArgs = new ArrayList(); tmpExprArgs.addAll(args.subList(0, 2)); - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), - SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); + try { + gfDesc = ExprNodeGenericFuncDesc.newInstance(hiveUdf, tmpExprArgs); + } catch (UDFArgumentException e) { + LOG.error(e); + throw new RuntimeException(e); + } for (int i = 2; i < call.operands.size(); i++) { tmpExprArgs = new ArrayList(); tmpExprArgs.add(gfDesc); tmpExprArgs.add(args.get(i)); - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), - SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); + try { + gfDesc = ExprNodeGenericFuncDesc.newInstance(hiveUdf, tmpExprArgs); + } catch (UDFArgumentException e) { + LOG.error(e); + throw new RuntimeException(e); } + } } else { - GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF( - call.getOperator(), call.getType(), args.size()); + GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), + args.size()); if (hiveUdf == null) { - throw new RuntimeException("Cannot find UDF for " + call.getType() + " " + call.getOperator() - + "[" + call.getOperator().getKind() + "]/" + args.size()); + throw new RuntimeException("Cannot find UDF for " + call.getType() + " " + + call.getOperator() + "[" + call.getOperator().getKind() + "]/" + args.size()); } - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), hiveUdf, args); + try { + gfDesc = ExprNodeGenericFuncDesc.newInstance(hiveUdf, args); + } catch (UDFArgumentException e) { + LOG.error(e); + throw new RuntimeException(e); } - + } return gfDesc; } + /** + * TODO: 1. Handle NULL + */ @Override public ExprNodeDesc visitLiteral(RexLiteral literal) { RelDataType lType = literal.getType(); @@ -176,4 +227,138 @@ } } + @Override + public ExprNodeDesc visitOver(RexOver over) { + if (!deep) { + return null; } + + final RexWindow window = over.getWindow(); + + final WindowSpec windowSpec = new WindowSpec(); + final PartitioningSpec partitioningSpec = getPSpec(window); + windowSpec.setPartitioning(partitioningSpec); + final WindowFrameSpec windowFrameSpec = getWindowRange(window); + windowSpec.setWindowFrame(windowFrameSpec); + + wfs = new WindowFunctionSpec(); + wfs.setWindowSpec(windowSpec); + final Schema schema = new Schema(tabAlias, inputRowType.getFieldList()); + final ASTNode wUDAFAst = new ASTConverter.RexVisitor(schema).visitOver(over); + wfs.setExpression(wUDAFAst); + ASTNode nameNode = (ASTNode) wUDAFAst.getChild(0); + wfs.setName(nameNode.getText()); + for(int i=1; i < wUDAFAst.getChildCount()-1; i++) { + ASTNode child = (ASTNode) wUDAFAst.getChild(i); + wfs.addArg(child); + } + wfs.setAlias(columnAlias); + + RelDataTypeField f = outputRowType.getField(columnAlias, false, false); + return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), columnAlias, tabAlias, + partitioningExpr); + } + + private PartitioningSpec getPSpec(RexWindow window) { + PartitioningSpec partitioning = new PartitioningSpec(); + + if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) { + PartitionSpec pSpec = new PartitionSpec(); + for (RexNode pk : window.partitionKeys) { + PartitionExpression exprSpec = new PartitionExpression(); + RexInputRef inputRef = (RexInputRef) pk; + RelDataTypeField f = inputRowType.getFieldList().get(inputRef.getIndex()); + ASTNode astCol; + if (tabAlias == null || tabAlias.isEmpty()) { + astCol = ASTBuilder.unqualifiedName(f.getName()); + } else { + astCol = ASTBuilder.qualifiedName(tabAlias, f.getName()); + } + exprSpec.setExpression(astCol); + pSpec.addExpression(exprSpec); + } + partitioning.setPartSpec(pSpec); + } + + if (window.orderKeys != null && !window.orderKeys.isEmpty()) { + OrderSpec oSpec = new OrderSpec(); + for (RexFieldCollation ok : window.orderKeys) { + OrderExpression exprSpec = new OrderExpression(); + Order order = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? + Order.ASC : Order.DESC; + exprSpec.setOrder(order); + RexInputRef inputRef = (RexInputRef) ok.left; + RelDataTypeField f = inputRowType.getFieldList().get(inputRef.getIndex()); + ASTNode astCol; + if (tabAlias == null || tabAlias.isEmpty()) { + astCol = ASTBuilder.unqualifiedName(f.getName()); + } else { + astCol = ASTBuilder.qualifiedName(tabAlias, f.getName()); + } + exprSpec.setExpression(astCol); + oSpec.addExpression(exprSpec); + } + partitioning.setOrderSpec(oSpec); + } + + return partitioning; + } + + private WindowFrameSpec getWindowRange(RexWindow window) { + // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical) + + WindowFrameSpec windowFrame = new WindowFrameSpec(); + + BoundarySpec start = null; + RexWindowBound ub = window.getUpperBound(); + if (ub != null) { + start = getWindowBound(ub, window.isRows()); + } + + BoundarySpec end = null; + RexWindowBound lb = window.getLowerBound(); + if (lb != null) { + end = getWindowBound(lb, window.isRows()); + } + + if (start != null || end != null) { + if (start != null) { + windowFrame.setStart(start); + } + if (end != null) { + windowFrame.setEnd(end); + } + } + + return windowFrame; + } + + private BoundarySpec getWindowBound(RexWindowBound wb, boolean isRows) { + BoundarySpec boundarySpec; + + if (wb.isCurrentRow()) { + boundarySpec = new CurrentRowSpec(); + } else { + final Direction direction; + final int amt; + if (wb.isPreceding()) { + direction = Direction.PRECEDING; + } else { + direction = Direction.FOLLOWING; + } + if (wb.isUnbounded()) { + amt = BoundarySpec.UNBOUNDED_AMOUNT; + } else { + amt = RexLiteral.intValue(wb.getOffset()); + } + if (isRows) { + boundarySpec = new RangeBoundarySpec(direction, amt); + } else { + boundarySpec = new ValueBoundarySpec(direction, amt); + } + } + + return boundarySpec; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -68,7 +69,7 @@ // However in HIVE DB name can not appear in select list; in case of join // where table names differ only in DB name, Hive would require user // introducing explicit aliases for tbl. - b.add(HiveParser.Identifier, hTbl.getTableAlias()); + b.add(HiveParser.Identifier, ((HiveTableScan)scan).getTableAlias()); return b.node(); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java (working copy) @@ -50,9 +50,11 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ImmutableList; @@ -95,6 +97,23 @@ return newTopNode; } + private static String getTblAlias(RelNode rel) { + + if (null == rel) { + return null; + } + if (rel instanceof HiveTableScan) { + return ((HiveTableScan)rel).getTableAlias(); + } + if (rel instanceof Project) { + return null; + } + if (rel.getInputs().size() == 1) { + return getTblAlias(rel.getInput(0)); + } + return null; + } + private static void convertOpTree(RelNode rel, RelNode parent) { if (rel instanceof HepRelVertex) { @@ -103,6 +122,12 @@ if (!validJoinParent(rel, parent)) { introduceDerivedTable(rel, parent); } + String leftChild = getTblAlias(((Join)rel).getLeft()); + if (null != leftChild && leftChild.equalsIgnoreCase(getTblAlias(((Join)rel).getRight()))) { + // introduce derived table above one child, if this is self-join + // since user provided aliases are lost at this point. + introduceDerivedTable(((Join)rel).getLeft(), rel); + } } else if (rel instanceof MultiJoin) { throw new RuntimeException("Found MultiJoin"); } else if (rel instanceof RelSubset) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java (working copy) @@ -28,21 +28,31 @@ import org.apache.calcite.plan.RelOptAbstractTable; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptUtil.InputFinder; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableBitSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -51,15 +61,16 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableMap.Builder; +import com.google.common.collect.Lists; public class RelOptHiveTable extends RelOptAbstractTable { private final Table hiveTblMetadata; - private final String tblAlias; private final ImmutableList hiveNonPartitionCols; + private final ImmutableList hivePartitionCols; private final ImmutableMap hiveNonPartitionColsMap; private final ImmutableMap hivePartitionColsMap; - private final int noOfProjs; + private final ImmutableList hiveVirtualCols; + private final int noOfNonVirtualCols; final HiveConf hiveConf; private double rowCount = -1; @@ -67,37 +78,65 @@ PrunedPartitionList partitionList; Map partitionCache; AtomicInteger noColsMissingStats; + private final String qbID; protected static final Log LOG = LogFactory .getLog(RelOptHiveTable.class .getName()); - public RelOptHiveTable(RelOptSchema calciteSchema, String qualifiedTblName, String tblAlias, RelDataType rowType, - Table hiveTblMetadata, List hiveNonPartitionCols, - List hivePartitionCols, HiveConf hconf, Map partitionCache, AtomicInteger noColsMissingStats) { + public RelOptHiveTable(RelOptSchema calciteSchema, String qualifiedTblName, + RelDataType rowType, Table hiveTblMetadata, List hiveNonPartitionCols, + List hivePartitionCols, List hiveVirtualCols, HiveConf hconf, + Map partitionCache, AtomicInteger noColsMissingStats, + String qbID) { super(calciteSchema, qualifiedTblName, rowType); this.hiveTblMetadata = hiveTblMetadata; - this.tblAlias = tblAlias; this.hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols); - this.hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0); - this.hivePartitionColsMap = getColInfoMap(hivePartitionCols, hiveNonPartitionColsMap.size()); - this.noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size(); + this.hiveNonPartitionColsMap = HiveCalciteUtil.getColInfoMap(hiveNonPartitionCols, 0); + this.hivePartitionCols = ImmutableList.copyOf(hivePartitionCols); + this.hivePartitionColsMap = HiveCalciteUtil.getColInfoMap(hivePartitionCols, hiveNonPartitionColsMap.size()); + this.noOfNonVirtualCols = hiveNonPartitionCols.size() + hivePartitionCols.size(); + this.hiveVirtualCols = ImmutableList.copyOf(hiveVirtualCols); this.hiveConf = hconf; this.partitionCache = partitionCache; this.noColsMissingStats = noColsMissingStats; + this.qbID = qbID; } - private static ImmutableMap getColInfoMap(List hiveCols, - int startIndx) { - Builder bldr = ImmutableMap. builder(); + public RelOptHiveTable copy(RelDataType newRowType) { + // 1. Build map of column name to col index of original schema + // Assumption: Hive Table can not contain duplicate column names + Map nameToColIndxMap = new HashMap(); + for (RelDataTypeField f : this.rowType.getFieldList()) { + nameToColIndxMap.put(f.getName(), f.getIndex()); + } - int indx = startIndx; - for (ColumnInfo ci : hiveCols) { - bldr.put(indx, ci); - indx++; + // 2. Build nonPart/Part/Virtual column info for new RowSchema + List newHiveNonPartitionCols = new ArrayList(); + List newHivePartitionCols = new ArrayList(); + List newHiveVirtualCols = new ArrayList(); + Map virtualColInfoMap = HiveCalciteUtil.getVColsMap(this.hiveVirtualCols, + this.noOfNonVirtualCols); + Integer originalColIndx; + ColumnInfo cInfo; + VirtualColumn vc; + for (RelDataTypeField f : newRowType.getFieldList()) { + originalColIndx = nameToColIndxMap.get(f.getName()); + if ((cInfo = hiveNonPartitionColsMap.get(originalColIndx)) != null) { + newHiveNonPartitionCols.add(new ColumnInfo(cInfo)); + } else if ((cInfo = hivePartitionColsMap.get(originalColIndx)) != null) { + newHivePartitionCols.add(new ColumnInfo(cInfo)); + } else if ((vc = virtualColInfoMap.get(originalColIndx)) != null) { + newHiveVirtualCols.add(vc); + } else { + throw new RuntimeException("Copy encountered a column not seen in original TS"); } + } - return bldr.build(); + // 3. Build new Table + return new RelOptHiveTable(this.schema, this.name, newRowType, + this.hiveTblMetadata, newHiveNonPartitionCols, newHivePartitionCols, newHiveVirtualCols, + this.hiveConf, this.partitionCache, this.noColsMissingStats, qbID); } @Override @@ -116,16 +155,57 @@ } @Override + public List getCollationList() { + ImmutableList.Builder collationList = new ImmutableList.Builder(); + for (Order sortColumn : this.hiveTblMetadata.getSortCols()) { + for (int i=0; i() + .add(RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(collationList.build()))) + .build(); + } + + @Override + public RelDistribution getDistribution() { + ImmutableList.Builder columnPositions = new ImmutableList.Builder(); + for (String bucketColumn : this.hiveTblMetadata.getBucketCols()) { + for (int i=0; i rowCounts = StatsUtils.getBasicStatForPartitions( - hiveTblMetadata, partitionList.getNotDeniedPartns(), - StatsSetupConst.ROW_COUNT); + List rowCounts = StatsUtils.getBasicStatForPartitions(hiveTblMetadata, + partitionList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT); rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts); } else { @@ -143,19 +223,6 @@ return hiveTblMetadata; } - public String getTableAlias() { - // NOTE: Calcite considers tbls to be equal if their names are the same. Hence - // we need to provide Calcite the fully qualified table name (dbname.tblname) - // and not the user provided aliases. - // However in HIVE DB name can not appear in select list; in case of join - // where table names differ only in DB name, Hive would require user - // introducing explicit aliases for tbl. - if (tblAlias == null) - return hiveTblMetadata.getTableName(); - else - return tblAlias; - } - private String getColNamesForLogging(Set colLst) { StringBuffer sb = new StringBuffer(); boolean firstEntry = true; @@ -173,22 +240,27 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode) { try { - if (!hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) { - // there is no predicate on partitioning column, we need all partitions in this case. - partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), partitionCache); + if (!hiveTblMetadata.isPartitioned() || pruneNode == null + || InputFinder.bits(pruneNode).length() == 0) { + // there is no predicate on partitioning column, we need all partitions + // in this case. + partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), + partitionCache); return; } // We have valid pruning expressions, only retrieve qualifying partitions - ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true, getRelOptSchema().getTypeFactory())); + ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), + true, this.getRelOptSchema().getTypeFactory())); - partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache); + partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), + partitionCache); } catch (HiveException he) { throw new RuntimeException(he); } } - private void updateColStats(Set projIndxLst) { + private void updateColStats(Set projIndxLst, boolean allowNullColumnForMissingStats) { List nonPartColNamesThatRqrStats = new ArrayList(); List nonPartColIndxsThatRqrStats = new ArrayList(); List partColNamesThatRqrStats = new ArrayList(); @@ -289,10 +361,10 @@ if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) { ColStatistics cStats = null; for (int i = 0; i < partColNamesThatRqrStats.size(); i++) { - cStats = new ColStatistics(hiveTblMetadata.getTableName(), - partColNamesThatRqrStats.get(i), hivePartitionColsMap.get( - partColIndxsThatRqrStats.get(i)).getTypeName()); - cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(),partColNamesThatRqrStats.get(i))); + cStats = new ColStatistics(hiveTblMetadata.getTableName(), partColNamesThatRqrStats.get(i), + hivePartitionColsMap.get(partColIndxsThatRqrStats.get(i)).getTypeName()); + cStats.setCountDistint(getDistinctCount(partitionList.getPartitions(), + partColNamesThatRqrStats.get(i))); hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats); } } @@ -301,11 +373,15 @@ if (!colNamesFailedStats.isEmpty()) { String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " + getColNamesForLogging(colNamesFailedStats); + noColsMissingStats.getAndAdd(colNamesFailedStats.size()); + if (allowNullColumnForMissingStats) { + LOG.warn(logMsg); + } else { LOG.error(logMsg); - noColsMissingStats.getAndAdd(colNamesFailedStats.size()); throw new RuntimeException(logMsg); } } + } private int getDistinctCount(Set partitions, String partColName) { Set distinctVals = new HashSet(partitions.size()); @@ -316,32 +392,34 @@ } public List getColStat(List projIndxLst) { - ImmutableList.Builder colStatsBldr = ImmutableList. builder(); + return getColStat(projIndxLst, false); + } + public List getColStat(List projIndxLst, boolean allowNullColumnForMissingStats) { + List colStatsBldr = Lists.newArrayList(); + if (projIndxLst != null) { - updateColStats(new HashSet(projIndxLst)); + updateColStats(new HashSet(projIndxLst), allowNullColumnForMissingStats); for (Integer i : projIndxLst) { colStatsBldr.add(hiveColStatsMap.get(i)); } } else { List pILst = new ArrayList(); - for (Integer i = 0; i < noOfProjs; i++) { + for (Integer i = 0; i < noOfNonVirtualCols; i++) { pILst.add(i); } - updateColStats(new HashSet(pILst)); + updateColStats(new HashSet(pILst), allowNullColumnForMissingStats); for (Integer pi : pILst) { colStatsBldr.add(hiveColStatsMap.get(pi)); } } - return colStatsBldr.build(); + return colStatsBldr; } /* - * use to check if a set of columns are all partition columns. - * true only if: - * - all columns in BitSet are partition - * columns. + * use to check if a set of columns are all partition columns. true only if: - + * all columns in BitSet are partition columns. */ public boolean containsPartitionColumnsOnly(ImmutableBitSet cols) { @@ -352,4 +430,32 @@ } return true; } + + public List getVirtualCols() { + return this.hiveVirtualCols; } + + public List getPartColumns() { + return this.hivePartitionCols; + } + + public List getNonPartColumns() { + return this.hiveNonPartitionCols; + } + + public String getQBID() { + return qbID; + } + + public int getNoOfNonVirtualCols() { + return noOfNonVirtualCols; + } + + public Map getPartColInfoMap() { + return hivePartitionColsMap; + } + + public Map getNonPartColInfoMap() { + return hiveNonPartitionColsMap; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java (revision 0) @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Exchange; +import org.apache.calcite.rel.core.Join; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; + +import com.google.common.collect.ImmutableList; + +/** Not an optimization rule. + * Rule to aid in translation from Calcite tree -> Hive tree. + * Transforms : + * Left Right Left Right + * \ / -> \ / + * Join HashExchange HashExchange + * \ / + * Join + */ +public class HiveInsertExchange4JoinRule extends RelOptRule { + + protected static transient final Log LOG = LogFactory + .getLog(HiveInsertExchange4JoinRule.class); + + public HiveInsertExchange4JoinRule() { + // match join with exactly 2 inputs + super(RelOptRule.operand(Join.class, + operand(RelNode.class, any()), + operand(RelNode.class, any()))); + } + + @Override + public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + + if (call.rel(1) instanceof Exchange && + call.rel(2) instanceof Exchange) { + return; + } + + JoinPredicateInfo joinPredInfo = + HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + + // get key columns from inputs. Those are the columns on which we will distribute on. + // It is also the columns we will sort on. + List joinLeftKeyPositions = new ArrayList(); + List joinRightKeyPositions = new ArrayList(); + ImmutableList.Builder leftCollationListBuilder = + new ImmutableList.Builder(); + ImmutableList.Builder rightCollationListBuilder = + new ImmutableList.Builder(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()); + for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()) { + leftCollationListBuilder.add(new RelFieldCollation(leftPos)); + } + joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()); + for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()) { + rightCollationListBuilder.add(new RelFieldCollation(rightPos)); + } + } + + HiveSortExchange left = HiveSortExchange.create(join.getLeft(), + new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinLeftKeyPositions), + new HiveRelCollation(leftCollationListBuilder.build())); + HiveSortExchange right = HiveSortExchange.create(join.getRight(), + new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinRightKeyPositions), + new HiveRelCollation(rightCollationListBuilder.build())); + + Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), + left, right, join.getJoinType(), join.isSemiJoinDone()); + + call.getPlanner().onCopy(join, newJoin); + + call.transformTo(newJoin); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java (revision 0) @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +import com.google.common.collect.ImmutableList; + +public final class HiveJoinAddNotNullRule extends RelOptRule { + + private static final String NOT_NULL_FUNC_NAME = "isnotnull"; + + /** The singleton. */ + public static final HiveJoinAddNotNullRule INSTANCE = + new HiveJoinAddNotNullRule(HiveFilter.DEFAULT_FILTER_FACTORY); + + private final FilterFactory filterFactory; + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates an HiveJoinAddNotNullRule. + */ + public HiveJoinAddNotNullRule(FilterFactory filterFactory) { + super(operand(Join.class, + operand(RelNode.class, any()), + operand(RelNode.class, any()))); + this.filterFactory = filterFactory; + } + + //~ Methods ---------------------------------------------------------------- + + public void onMatch(RelOptRuleCall call) { + final Join join = call.rel(0); + RelNode leftInput = call.rel(1); + RelNode rightInput = call.rel(2); + + if (join.getJoinType() != JoinRelType.INNER) { + return; + } + + if (join.getCondition().isAlwaysTrue()) { + return; + } + + JoinPredicateInfo joinPredInfo = + HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + + Set joinLeftKeyPositions = new HashSet(); + Set joinRightKeyPositions = new HashSet(); + for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) { + JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo. + getEquiJoinPredicateElements().get(i); + joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()); + joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()); + } + + // Build not null conditions + final RelOptCluster cluster = join.getCluster(); + final RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + + final Map newLeftConditions = getNotNullConditions(cluster, + rexBuilder, leftInput, joinLeftKeyPositions); + final Map newRightConditions = getNotNullConditions(cluster, + rexBuilder, rightInput, joinRightKeyPositions); + + // Nothing will be added to the expression + if (newLeftConditions == null && newRightConditions == null) { + return; + } + + if (newLeftConditions != null) { + if (leftInput instanceof HiveFilter) { + leftInput = leftInput.getInput(0); + } + leftInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, + leftInput, newLeftConditions.values()); + } + if (newRightConditions != null) { + if (rightInput instanceof HiveFilter) { + rightInput = rightInput.getInput(0); + } + rightInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, + rightInput, newRightConditions.values()); + } + + Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), + leftInput, rightInput, join.getJoinType(), join.isSemiJoinDone()); + + call.getPlanner().onCopy(join, newJoin); + + call.transformTo(newJoin); + } + + private static Map getNotNullConditions(RelOptCluster cluster, + RexBuilder rexBuilder, RelNode input, Set inputKeyPositions) { + + boolean added = false; + + final RelDataType returnType = cluster.getTypeFactory(). + createSqlType(SqlTypeName.BOOLEAN); + + final Map newConditions; + if (input instanceof HiveFilter) { + newConditions = splitCondition(((HiveFilter) input).getCondition()); + } + else { + newConditions = new HashMap(); + } + for (int pos : inputKeyPositions) { + try { + RelDataType keyType = input.getRowType().getFieldList().get(pos).getType(); + // Nothing to do if key cannot be null + if (!keyType.isNullable()) { + continue; + } + SqlOperator funcCall = SqlFunctionConverter.getCalciteOperator(NOT_NULL_FUNC_NAME, + FunctionRegistry.getFunctionInfo(NOT_NULL_FUNC_NAME).getGenericUDF(), + ImmutableList.of(keyType), returnType); + RexNode cond = rexBuilder.makeCall(funcCall, rexBuilder.makeInputRef(input, pos)); + String digest = cond.toString(); + if (!newConditions.containsKey(digest)) { + newConditions.put(digest,cond); + added = true; + } + } catch (SemanticException e) { + throw new AssertionError(e.getMessage()); + } + } + // Nothing will be added to the expression + if (!added) { + return null; + } + return newConditions; + } + + private static Map splitCondition(RexNode condition) { + Map newConditions = new HashMap(); + if (condition.getKind() == SqlKind.AND) { + for (RexNode node : ((RexCall) condition).getOperands()) { + newConditions.put(node.toString(), node); + } + } + else { + newConditions.put(condition.toString(), condition); + } + return newConditions; + } + + private static RelNode createHiveFilterConjunctiveCondition(FilterFactory filterFactory, + RexBuilder rexBuilder, RelNode input, Collection conditions) { + final RexNode newCondition = RexUtil.composeConjunction(rexBuilder, conditions, false); + return filterFactory.createFilter(input, newCondition); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java (revision 0) @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdDistribution; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdDistribution { + + public static final RelMetadataProvider SOURCE = + ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()), + RelMdDistribution.SOURCE)); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdDistribution() {} + + //~ Methods ---------------------------------------------------------------- + + public RelDistribution distribution(HiveAggregate aggregate) { + return new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, + aggregate.getGroupSet().asList()); + } + + public RelDistribution distribution(HiveJoin join) { + return join.getDistribution(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java (working copy) @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import java.util.ArrayList; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java (working copy) @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import java.util.BitSet; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java (revision 0) @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdCollation; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdCollation { + + public static final RelMetadataProvider SOURCE = + ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLLATIONS.method, new HiveRelMdCollation()), + RelMdCollation.SOURCE)); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdCollation() {} + + //~ Methods ---------------------------------------------------------------- + + public ImmutableList collations(HiveAggregate aggregate) { + // Compute collations + ImmutableList.Builder collationListBuilder = + new ImmutableList.Builder(); + for (int pos : aggregate.getGroupSet().asList()) { + final RelFieldCollation fieldCollation = new RelFieldCollation(pos); + collationListBuilder.add(fieldCollation); + } + // Return aggregate collations + return ImmutableList.of( + RelCollationTraitDef.INSTANCE.canonize( + new HiveRelCollation(collationListBuilder.build()))); + } + + public ImmutableList collations(HiveJoin join) { + return join.getCollation(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java (revision 0) @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdParallelism; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +public class HiveRelMdParallelism extends RelMdParallelism { + + private final Double maxSplitSize; + + //~ Constructors ----------------------------------------------------------- + + public HiveRelMdParallelism(Double maxSplitSize) { + this.maxSplitSize = maxSplitSize; + } + + public RelMetadataProvider getMetadataProvider() { + return ReflectiveRelMetadataProvider.reflectiveSource(this, + BuiltInMethod.IS_PHASE_TRANSITION.method, + BuiltInMethod.SPLIT_COUNT.method); + } + + //~ Methods ---------------------------------------------------------------- + + public Boolean isPhaseTransition(HiveJoin join) { + return join.isPhaseTransition(); + } + + public Boolean isPhaseTransition(HiveSort sort) { + // As Exchange operator is introduced later on, we make a + // sort operator create a new stage for the moment + return true; + } + + public Integer splitCount(HiveJoin join) { + return join.getSplitCount(); + } + + public Integer splitCount(HiveTableScan scan) { + RelOptHiveTable table = (RelOptHiveTable) scan.getTable(); + return table.getHiveTableMD().getNumBuckets(); + } + + public Integer splitCount(RelNode rel) { + Boolean newPhase = RelMetadataQuery.isPhaseTransition(rel); + + if (newPhase == null) { + return null; + } + + if (newPhase) { + // We repartition: new number of splits + return splitCountRepartition(rel); + } + + // We do not repartition: take number of splits from children + Integer splitCount = 0; + for (RelNode input : rel.getInputs()) { + splitCount += RelMetadataQuery.splitCount(input); + } + return splitCount; + } + + public Integer splitCountRepartition(RelNode rel) { + // We repartition: new number of splits + final Double averageRowSize = RelMetadataQuery.getAverageRowSize(rel); + final Double rowCount = RelMetadataQuery.getRowCount(rel); + if (averageRowSize == null || rowCount == null) { + return null; + } + final Double totalSize = averageRowSize * rowCount; + final Double splitCount = totalSize / maxSplitSize; + return splitCount.intValue(); + } + +} + +// End RelMdParallelism.java \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java (revision 0) @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdMemory; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; + +public class HiveRelMdMemory extends RelMdMemory { + + private static final HiveRelMdMemory INSTANCE = new HiveRelMdMemory(); + + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource(INSTANCE, + BuiltInMethod.MEMORY.method, + BuiltInMethod.CUMULATIVE_MEMORY_WITHIN_PHASE.method, + BuiltInMethod.CUMULATIVE_MEMORY_WITHIN_PHASE_SPLIT.method); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdMemory() {} + + //~ Methods ---------------------------------------------------------------- + + public Double memory(HiveTableScan tableScan) { + return 0.0d; + } + + public Double memory(HiveAggregate aggregate) { + final Double avgRowSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); + final Double rowCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + if (avgRowSize == null || rowCount == null) { + return null; + } + return avgRowSize * rowCount; + } + + public Double memory(HiveFilter filter) { + return 0.0; + } + + public Double memory(HiveJoin join) { + return join.getMemory(); + } + + public Double cumulativeMemoryWithinPhaseSplit(HiveJoin join) { + return join.getCumulativeMemoryWithinPhaseSplit(); + } + + public Double memory(HiveLimit limit) { + return 0.0; + } + + public Double memory(HiveProject project) { + return 0.0; + } + + public Double memory(HiveSort sort) { + if (sort.getCollation() != RelCollations.EMPTY) { + // It sorts + final Double avgRowSize = RelMetadataQuery.getAverageRowSize(sort.getInput()); + final Double rowCount = RelMetadataQuery.getRowCount(sort.getInput()); + if (avgRowSize == null || rowCount == null) { + return null; + } + return avgRowSize * rowCount; + } + // It does not sort, memory footprint is zero + return 0.0; + } + + public Double memory(HiveUnion union) { + return 0.0; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java (revision 0) @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.List; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdSize; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableNullableList; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdSize extends RelMdSize { + + private static final HiveRelMdSize INSTANCE = new HiveRelMdSize(); + + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource(INSTANCE, + BuiltInMethod.AVERAGE_COLUMN_SIZES.method, + BuiltInMethod.AVERAGE_ROW_SIZE.method); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdSize() {} + + //~ Methods ---------------------------------------------------------------- + + public List averageColumnSizes(HiveTableScan scan) { + List neededcolsLst = scan.getNeededColIndxsFrmReloptHT(); + List columnStatistics = ((RelOptHiveTable) scan.getTable()) + .getColStat(neededcolsLst, true); + + // Obtain list of col stats, or use default if they are not available + final ImmutableList.Builder list = ImmutableList.builder(); + int indxRqdCol = 0; + int nFields = scan.getRowType().getFieldCount(); + for (int i = 0; i < nFields; i++) { + if (neededcolsLst.contains(i)) { + ColStatistics columnStatistic = columnStatistics.get(indxRqdCol); + indxRqdCol++; + if (columnStatistic == null) { + RelDataTypeField field = scan.getRowType().getFieldList().get(i); + list.add(averageTypeValueSize(field.getType())); + } else { + list.add(columnStatistic.getAvgColLen()); + } + } else { + list.add(new Double(0)); + } + } + + return list.build(); + } + + public List averageColumnSizes(HiveJoin rel) { + final RelNode left = rel.getLeft(); + final RelNode right = rel.getRight(); + final List lefts = + RelMetadataQuery.getAverageColumnSizes(left); + List rights = null; + if (!rel.isLeftSemiJoin()) { + rights = RelMetadataQuery.getAverageColumnSizes(right); + } + if (lefts == null && rights == null) { + return null; + } + final int fieldCount = rel.getRowType().getFieldCount(); + Double[] sizes = new Double[fieldCount]; + if (lefts != null) { + lefts.toArray(sizes); + } + if (rights != null) { + final int leftCount = left.getRowType().getFieldCount(); + for (int i = 0; i < rights.size(); i++) { + sizes[leftCount + i] = rights.get(i); + } + } + return ImmutableNullableList.copyOf(sizes); + } + + // TODO: remove when averageTypeValueSize method RelMdSize + // supports all types + public Double averageTypeValueSize(RelDataType type) { + switch (type.getSqlTypeName()) { + case BOOLEAN: + case TINYINT: + return 1d; + case SMALLINT: + return 2d; + case INTEGER: + case FLOAT: + case REAL: + case DECIMAL: + case DATE: + case TIME: + return 4d; + case BIGINT: + case DOUBLE: + case TIMESTAMP: + case INTERVAL_DAY_TIME: + case INTERVAL_YEAR_MONTH: + return 8d; + case BINARY: + return (double) type.getPrecision(); + case VARBINARY: + return Math.min((double) type.getPrecision(), 100d); + case CHAR: + return (double) type.getPrecision() * BYTES_PER_CHARACTER; + case VARCHAR: + // Even in large (say VARCHAR(2000)) columns most strings are small + return Math.min((double) type.getPrecision() * BYTES_PER_CHARACTER, 100d); + case ROW: + Double average = 0.0; + for (RelDataTypeField field : type.getFieldList()) { + average += averageTypeValueSize(field.getType()); + } + return average; + default: + return null; + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java (working copy) @@ -28,8 +28,10 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; @@ -50,13 +52,18 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.calcite.util.Util; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; @@ -319,11 +326,11 @@ return this.mapOfProjIndxInJoinSchemaToLeafPInfo; } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j) { return constructJoinPredicateInfo(j, j.getCondition()); } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveJoin j, RexNode predicate) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j, RexNode predicate) { JoinPredicateInfo jpi = null; JoinLeafPredicateInfo jlpi = null; List equiLPIList = new ArrayList(); @@ -432,6 +439,16 @@ .copyOf(projsFromRightPartOfJoinKeysInJoinSchema); } + public List getJoinKeyExprs(int input) { + if (input == 0) { + return this.joinKeyExprsFromLeft; + } + if (input == 1) { + return this.joinKeyExprsFromRight; + } + return null; + } + public List getJoinKeyExprsFromLeft() { return this.joinKeyExprsFromLeft; } @@ -461,7 +478,7 @@ return this.projsFromRightPartOfJoinKeysInJoinSchema; } - private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(HiveJoin j, RexNode pe) { + private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(Join j, RexNode pe) { JoinLeafPredicateInfo jlpi = null; List filterNulls = new ArrayList(); List joinKeyExprsFromLeft = new ArrayList(); @@ -561,6 +578,107 @@ return deterministic; } + public static ImmutableMap getColInfoMap(List hiveCols, + int startIndx) { + Builder bldr = ImmutableMap. builder(); + + int indx = startIndx; + for (T ci : hiveCols) { + bldr.put(indx, ci); + indx++; + } + + return bldr.build(); + } + + public static ImmutableMap shiftVColsMap(Map hiveVCols, + int shift) { + Builder bldr = ImmutableMap. builder(); + + for (Integer pos : hiveVCols.keySet()) { + bldr.put(shift + pos, hiveVCols.get(pos)); + } + + return bldr.build(); + } + + public static ImmutableMap getVColsMap(List hiveVCols, + int startIndx) { + Builder bldr = ImmutableMap. builder(); + + int indx = startIndx; + for (VirtualColumn vc : hiveVCols) { + bldr.put(indx, vc); + indx++; + } + + return bldr.build(); + } + + public static ImmutableMap getColNameIndxMap(List tableFields) { + Builder bldr = ImmutableMap. builder(); + + int indx = 0; + for (FieldSchema fs : tableFields) { + bldr.put(fs.getName(), indx); + indx++; + } + + return bldr.build(); + } + + public static ImmutableMap getRowColNameIndxMap(List rowFields) { + Builder bldr = ImmutableMap. builder(); + + int indx = 0; + for (RelDataTypeField rdt : rowFields) { + bldr.put(rdt.getName(), indx); + indx++; + } + + return bldr.build(); + } + + public static ImmutableList getInputRef(List inputRefs, RelNode inputRel) { + ImmutableList.Builder bldr = ImmutableList. builder(); + for (int i : inputRefs) { + bldr.add(new RexInputRef(i, (RelDataType) inputRel.getRowType().getFieldList().get(i).getType())); + } + return bldr.build(); + } + + public static ExprNodeDesc getExprNode(Integer inputRefIndx, RelNode inputRel, + ExprNodeConverter exprConv) { + ExprNodeDesc exprNode = null; + RexNode rexInputRef = new RexInputRef(inputRefIndx, (RelDataType) inputRel.getRowType() + .getFieldList().get(inputRefIndx).getType()); + exprNode = rexInputRef.accept(exprConv); + + return exprNode; + } + + public static List getExprNodes(List inputRefs, RelNode inputRel, + String inputTabAlias) { + List exprNodes = new ArrayList(); + List rexInputRefs = getInputRef(inputRefs, inputRel); + // TODO: Change ExprNodeConverter to be independent of Partition Expr + ExprNodeConverter exprConv = new ExprNodeConverter(inputTabAlias, inputRel.getRowType(), false, inputRel.getCluster().getTypeFactory()); + for (RexNode iRef : rexInputRefs) { + exprNodes.add(iRef.accept(exprConv)); + } + return exprNodes; + } + + public static List getFieldNames(List inputRefs, RelNode inputRel) { + List fieldNames = new ArrayList(); + List schemaNames = inputRel.getRowType().getFieldNames(); + for (Integer iRef : inputRefs) { + fieldNames.add(schemaNames.get(iRef)); + } + + return fieldNames; + } + /** * Walks over an expression and determines whether it is constant. */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java (revision 0) @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.List; + +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTrait; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.RelDistributionTraitDef; +import org.apache.calcite.util.mapping.Mappings.TargetMapping; + +public class HiveRelDistribution implements RelDistribution { + + List keys; + RelDistribution.Type type; + + public HiveRelDistribution(Type type, List keys) { + this.type = type; + this.keys = keys; + } + + @Override + public RelTraitDef getTraitDef() { + return RelDistributionTraitDef.INSTANCE; + } + + @Override + public void register(RelOptPlanner planner) { + + } + + @Override + public boolean satisfies(RelTrait trait) { + if (trait == this) { + return true; + } + switch (((RelDistribution)trait).getType()) { + case HASH_DISTRIBUTED : + return this.getKeys().equals(((RelDistribution)trait).getKeys()); + default: + throw new RuntimeException("Other distributions are not used yet."); + } + } + + @Override + public RelDistribution apply(TargetMapping mapping) { + if (keys.isEmpty()) { + return this; + } + return new HiveRelDistribution(type, keys); + } + + @Override + public List getKeys() { + return keys; + } + + @Override + public Type getType() { + return type; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -531,14 +531,15 @@ Operator child = op.getChildOperators().get(0); - List childCols; + List childCols = null; if (child instanceof CommonJoinOperator) { - childCols = cppCtx.getJoinPrunedColLists().get(child) + childCols = cppCtx.getJoinPrunedColLists().get(child) == null + ? null : cppCtx.getJoinPrunedColLists().get(child) .get((byte) conf.getTag()); } else { childCols = cppCtx.getPrunedColList(child); + } - } List valCols = conf.getValueCols(); List valColNames = conf.getOutputValueColumnNames(); @@ -749,6 +750,7 @@ conf.setOutputColumnNames(newOutputColumnNames); handleChildren(op, cols, cppCtx); } + return null; } @@ -971,12 +973,12 @@ .getChildOperators(); LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs()); + List childColLists = cppCtx.genColLists(op); if (childColLists == null) { return; } - Map> prunedColLists = new HashMap>(); for (byte tag : conf.getTagOrder()) { prunedColLists.put(tag, new ArrayList()); @@ -1076,6 +1078,7 @@ } LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs()); + op.setColumnExprMap(newColExprMap); conf.setOutputColumnNames(outputCols); op.getSchema().setSignature(rs); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -142,7 +142,9 @@ if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { transformations.add(new ReduceSinkDeDuplication()); } + if(!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { transformations.add(new NonBlockingOpDeDupProc()); + } if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)) { transformations.add(new IdentityProjectRemover()); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java (working copy) @@ -242,4 +242,4 @@ return null; } } -} +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (working copy) @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -444,4 +445,42 @@ // If the child is also decimal, no cast is needed (we hope - can target type be narrower?). return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory(childTi); } + + /** + * Build ExprNodeColumnDesc for the projections in the input operator from + * sartpos to endpos(both included). Operator must have an associated + * colExprMap. + * + * @param inputOp + * Input Hive Operator + * @param startPos + * starting position in the input operator schema; must be >=0 and <= + * endPos + * @param endPos + * end position in the input operator schema; must be >=0. + * @return List of ExprNodeDesc + */ + public static ArrayList genExprNodeDesc(Operator inputOp, int startPos, int endPos, + boolean addEmptyTabAlias, boolean setColToNonVirtual) { + ArrayList exprColLst = new ArrayList(); + List colInfoLst = inputOp.getSchema().getSignature(); + + String tabAlias; + boolean vc; + ColumnInfo ci; + for (int i = startPos; i <= endPos; i++) { + ci = colInfoLst.get(i); + tabAlias = ci.getTabAlias(); + if (addEmptyTabAlias) { + tabAlias = ""; } + vc = ci.getIsVirtualCol(); + if (setColToNonVirtual) { + vc = false; + } + exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc)); + } + + return exprColLst; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (working copy) @@ -110,6 +110,13 @@ public JoinDesc(final Map> exprs, List outputColumnNames, final boolean noOuterJoin, + final JoinCondDesc[] conds, ExprNodeDesc[][] joinKeys) { + this (exprs, outputColumnNames, noOuterJoin, conds, + new HashMap>(), joinKeys); + } + + public JoinDesc(final Map> exprs, + List outputColumnNames, final boolean noOuterJoin, final JoinCondDesc[] conds, final Map> filters, ExprNodeDesc[][] joinKeys) { this.exprs = exprs; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -229,9 +229,9 @@ private HashMap opToPartPruner; private HashMap opToPartList; - private HashMap> topOps; - private final HashMap> topSelOps; - private final LinkedHashMap, OpParseContext> opParseCtx; + protected HashMap> topOps; + private HashMap> topSelOps; + protected LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; private final Map joinContext; @@ -258,7 +258,7 @@ private CreateViewDesc createVwDesc; private ArrayList viewsExpanded; private ASTNode viewSelect; - private final UnparseTranslator unparseTranslator; + protected final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx; // prefix for column names auto generated by hive @@ -478,7 +478,7 @@ wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) { continue; } - wFnSpec.setAlias("_wcol" + wColIdx); + wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx); spec.addWindowFunction(wFnSpec); qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression()); } @@ -3448,7 +3448,7 @@ return ret; } - private int setBit(int bitmap, int bitIdx) { + public static int setBit(int bitmap, int bitIdx) { return bitmap | (1 << bitIdx); } @@ -3984,10 +3984,10 @@ /** * Class to store GenericUDAF related information. */ - static class GenericUDAFInfo { - ArrayList convertedParameters; - GenericUDAFEvaluator genericUDAFEvaluator; - TypeInfo returnType; + public static class GenericUDAFInfo { + public ArrayList convertedParameters; + public GenericUDAFEvaluator genericUDAFEvaluator; + public TypeInfo returnType; } /** @@ -4028,7 +4028,7 @@ * Returns the GenericUDAFEvaluator for the aggregation. This is called once * for each GroupBy aggregation. */ - static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, + public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, ArrayList aggParameters, ASTNode aggTree, boolean isDistinct, boolean isAllColumns) throws SemanticException { @@ -4058,7 +4058,7 @@ * @throws SemanticException * when the UDAF is not found or has problems. */ - static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, + public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, GenericUDAFEvaluator.Mode emode, ArrayList aggParameters) throws SemanticException { @@ -4087,7 +4087,7 @@ return r; } - static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( + public static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( GroupByDesc.Mode mode, boolean isDistinct) { switch (mode) { case COMPLETE: @@ -4130,7 +4130,7 @@ * @return the ExprNodeDesc of the constant parameter if the given internalName represents * a constant parameter; otherwise, return null */ - private ExprNodeDesc isConstantParameterInAggregationParameters(String internalName, + public static ExprNodeDesc isConstantParameterInAggregationParameters(String internalName, List reduceValues) { // only the pattern of "VALUE._col([0-9]+)" should be handled. @@ -5577,7 +5577,7 @@ return false; } - private void checkExpressionsForGroupingSet(List grpByExprs, + void checkExpressionsForGroupingSet(List grpByExprs, List distinctGrpByExprs, Map aggregationTrees, RowResolver inputRowResolver) throws SemanticException { @@ -6131,7 +6131,7 @@ } @SuppressWarnings("nls") - private Operator genFileSinkPlan(String dest, QB qb, Operator input) + protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); @@ -9234,7 +9234,7 @@ return equalsExpr; } - private String getAliasId(String alias, QB qb) { + protected String getAliasId(String alias, QB qb) { return (qb.getId() == null ? alias : qb.getId() + ":" + alias).toLowerCase(); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (working copy) @@ -49,8 +49,8 @@ import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollations; -import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; @@ -58,8 +58,10 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -116,12 +118,14 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; @@ -135,8 +139,11 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; @@ -175,6 +182,7 @@ import com.google.common.collect.Lists; public class CalcitePlanner extends SemanticAnalyzer { + private final AtomicInteger noColsMissingStats = new AtomicInteger(0); private List topLevelFieldSchema; private SemanticException semanticException; @@ -218,13 +226,16 @@ if (cboCtx.type == PreCboCtx.Type.CTAS) { queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query } - runCBO = canHandleAstForCbo(queryForCbo, getQB(), cboCtx); + runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx); if (runCBO) { disableJoinMerge = true; boolean reAnalyzeAST = false; try { + if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + sinkOp = getOptimizedHiveOPDag(); + } else { // 1. Gen Optimized AST ASTNode newAST = getOptimizedAST(); @@ -252,6 +263,7 @@ LOG.info("CBO Succeeded; optimized logical plan."); this.ctx.setCboInfo("Plan optimized by CBO."); LOG.debug(newAST.dump()); + } } catch (Exception e) { boolean isMissingStats = noColsMissingStats.get() > 0; if (isMissingStats) { @@ -324,7 +336,7 @@ * If top level QB is query then everything below it must also be * Query. */ - boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { + boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { int root = ast.getToken().getType(); boolean needToLogMessage = STATIC_LOG.isInfoEnabled(); boolean isSupportedRoot = root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN @@ -598,6 +610,57 @@ return optiqOptimizedAST; } + /** + * Get Optimized Hive Operator DAG for the given QB tree in the semAnalyzer. + * + * @return Optimized Hive operator tree + * @throws SemanticException + */ + Operator getOptimizedHiveOPDag() throws SemanticException { + RelNode optimizedOptiqPlan = null; + CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + + try { + optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks + .newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); + } catch (Exception e) { + rethrowCalciteException(e); + throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); + } + + RelNode modifiedOptimizedOptiqPlan = introduceProjectIfNeeded(optimizedOptiqPlan); + + Operator hiveRoot = new HiveOpConverter(this, conf, unparseTranslator, topOps, + conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(modifiedOptimizedOptiqPlan); + RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB()); + opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR)); + return genFileSinkPlan(getQB().getParseInfo().getClauseNames().iterator().next(), getQB(), hiveRoot); + } + + private RelNode introduceProjectIfNeeded(RelNode optimizedOptiqPlan) + throws CalciteSemanticException { + RelNode parent = null; + RelNode input = optimizedOptiqPlan; + RelNode newRoot = optimizedOptiqPlan; + + while (!(input instanceof Project) && (input instanceof Sort)) { + parent = input; + input = input.getInput(0); + } + + if (!(input instanceof Project)) { + HiveProject hpRel = HiveProject.create(input, + HiveCalciteUtil.getProjsFromBelowAsInputRef(input), input.getRowType().getFieldNames()); + if (input == optimizedOptiqPlan) { + newRoot = hpRel; + } else { + parent.replaceInput(0, hpRel); + } + } + + return newRoot; + } + /*** * Unwraps Calcite Invocation exceptions coming meta data provider chain and * obtains the real cause. @@ -674,6 +737,24 @@ || t instanceof UndeclaredThrowableException; } + private RowResolver genRowResolver(Operator op, QB qb) { + RowResolver rr = new RowResolver(); + String subqAlias = (qb.getAliases().size() == 1 && qb.getSubqAliases().size() == 1) ? qb + .getAliases().get(0) : null; + + for (ColumnInfo ci : op.getSchema().getSignature()) { + try { + rr.putWithCheck((subqAlias != null) ? subqAlias : ci.getTabAlias(), + ci.getAlias() != null ? ci.getAlias() : ci.getInternalName(), ci.getInternalName(), + new ColumnInfo(ci)); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + } + + return rr; + } + /** * Code responsible for Calcite plan generation and optimization. */ @@ -700,7 +781,13 @@ /* * recreate cluster, so that it picks up the additional traitDef */ - RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); + final Double maxSplitSize = (double) HiveConf.getLongVar( + conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); + final Double maxMemory = (double) HiveConf.getLongVar( + conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); + HiveConfigContext confContext = new HiveConfigContext(algorithmsConf); + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); @@ -719,13 +806,16 @@ throw new RuntimeException(e); } + // Create MD provider + HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); + // 2. Apply Pre Join Order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, - HiveDefaultRelMetadataProvider.INSTANCE); + mdProvider.getMetadataProvider()); // 3. Appy Join Order Optimizations using Hep Planner (MST Algorithm) List list = Lists.newArrayList(); - list.add(HiveDefaultRelMetadataProvider.INSTANCE); + list.add(mdProvider.getMetadataProvider()); RelTraitSet desiredTraits = cluster .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); @@ -758,6 +848,18 @@ calciteOptimizedPlan = hepPlanner.findBestExp(); + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + // run rules to aid in translation from Optiq tree -> Hive tree + hepPgm = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new HiveInsertExchange4JoinRule()).build(); + hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + hepPlanner.setRoot(calciteOptimizedPlan); + calciteOptimizedPlan = hepPlanner.findBestExp(); + } + if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { LOG.debug("CBO Planning details:\n"); LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); @@ -789,7 +891,12 @@ basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 2. PPD + // 2. Add not null filters + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); + } + + // 3. PPD basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, @@ -802,19 +909,19 @@ HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - // 3. Transitive inference & Partition Pruning + // 4. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 4. Projection Pruning + // 5. Projection Pruning RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 5. Rerun PPD through Project as column pruning would have introduced DT + // 6. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, @@ -1186,7 +1293,7 @@ } // 2. Get Table Metadata - Table tab = qb.getMetaData().getSrcForAlias(tableAlias); + Table tabMetaData = qb.getMetaData().getSrcForAlias(tableAlias); // 3. Get Table Logical Schema (Row Type) // NOTE: Table logical schema = Non Partition Cols + Partition Cols + @@ -1194,7 +1301,7 @@ // 3.1 Add Column info for non partion cols (Object Inspector fields) @SuppressWarnings("deprecation") - StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() + StructObjectInspector rowObjectInspector = (StructObjectInspector) tabMetaData.getDeserializer() .getObjectInspector(); List fields = rowObjectInspector.getAllStructFieldRefs(); ColumnInfo colInfo; @@ -1216,7 +1323,7 @@ ArrayList partitionColumns = new ArrayList(); // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tab.getPartCols()) { + for (FieldSchema part_col : tabMetaData.getPartCols()) { colName = part_col.getName(); colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); @@ -1226,6 +1333,7 @@ } // 3.3 Add column info corresponding to virtual columns + List virtualCols = new ArrayList(); Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); while (vcs.hasNext()) { VirtualColumn vc = vcs.next(); @@ -1233,24 +1341,26 @@ vc.getIsHidden()); rr.put(tableAlias, vc.getName(), colInfo); cInfoLst.add(colInfo); + virtualCols.add(vc); } // 3.4 Build row type from field RelDataType rowType = TypeConverter.getType(cluster, rr, null); // 4. Build RelOptAbstractTable - String fullyQualifiedTabName = tab.getDbName(); - if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) - fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); - else - fullyQualifiedTabName = tab.getTableName(); + String fullyQualifiedTabName = tabMetaData.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { + fullyQualifiedTabName = fullyQualifiedTabName + "." + tabMetaData.getTableName(); + } + else { + fullyQualifiedTabName = tabMetaData.getTableName(); + } RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache, - noColsMissingStats); + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, noColsMissingStats, getAliasId(tableAlias, qb)); // 5. Build Hive Table Scan Rel - tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, - rowType); + tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias); // 6. Add Schema(RR) to RelNode-Schema map ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, @@ -1768,23 +1878,53 @@ qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); } } + List grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true : false; + final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); + + // 2. Sanity check + if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) + && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg()); + } + if (cubeRollupGrpSetPresent) { + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { + throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg()); + } + + if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { + checkExpressionsForGroupingSet(grpByAstExprs, qb.getParseInfo() + .getDistinctFuncExprsForClause(detsClauseName), aggregationTrees, + this.relToHiveRR.get(srcRel)); + + if (qbp.getDestGroupingSets().size() > conf + .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY)) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + qbp.getDestGroupingSets().size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); + } + } + } + + if (hasGrpByAstExprs || hasAggregationTrees) { ArrayList gbExprNDescLst = new ArrayList(); ArrayList outputColumnNames = new ArrayList(); - // 2. Input, Output Row Resolvers + // 3. Input, Output Row Resolvers RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); RowResolver groupByOutputRowResolver = new RowResolver(); groupByOutputRowResolver.setIsExprResolver(true); if (hasGrpByAstExprs) { - // 3. Construct GB Keys (ExprNode) + // 4. Construct GB Keys (ExprNode) for (int i = 0; i < grpByAstExprs.size(); ++i) { ASTNode grpbyExpr = grpByAstExprs.get(i); Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( @@ -1799,12 +1939,10 @@ } } - // 4. GroupingSets, Cube, Rollup + // 5. GroupingSets, Cube, Rollup int groupingColsSize = gbExprNDescLst.size(); List groupingSets = null; - if (!qbp.getDestRollups().isEmpty() - || !qbp.getDestGroupingSets().isEmpty() - || !qbp.getDestCubes().isEmpty()) { + if (cubeRollupGrpSetPresent) { if (qbp.getDestRollups().contains(detsClauseName)) { groupingSets = getGroupingSetsForRollup(grpByAstExprs.size()); } else if (qbp.getDestCubes().contains(detsClauseName)) { @@ -1827,18 +1965,18 @@ } } - // 5. Construct aggregation function Info + // 6. Construct aggregation function Info ArrayList aggregations = new ArrayList(); if (hasAggregationTrees) { assert (aggregationTrees != null); for (ASTNode value : aggregationTrees.values()) { - // 5.1 Determine type of UDAF + // 6.1 Determine type of UDAF // This is the GenericUDAF name String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText()); boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; - // 5.2 Convert UDAF Params to ExprNodeDesc + // 6.2 Convert UDAF Params to ExprNodeDesc ArrayList aggParameters = new ArrayList(); for (int i = 1; i < value.getChildCount(); i++) { ASTNode paraExpr = (ASTNode) value.getChild(i); @@ -1862,7 +2000,7 @@ } } - // 6. If GroupingSets, Cube, Rollup were used, we account grouping__id + // 7. If GroupingSets, Cube, Rollup were used, we account grouping__id if(groupingSets != null && !groupingSets.isEmpty()) { String field = getColumnInternalName(groupingColsSize + aggregations.size()); outputColumnNames.add(field); @@ -1874,7 +2012,7 @@ true)); } - // 7. We create the group_by operator + // 8. We create the group_by operator gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel); relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); @@ -2250,15 +2388,27 @@ } } - return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); + return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel, windowExpressions); } private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, RelNode srcRel) throws CalciteSemanticException { + return genSelectRelNode(calciteColLst, out_rwsch, srcRel, null); + } + + private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, + RelNode srcRel, List windowExpressions) throws CalciteSemanticException { // 1. Build Column Names Set colNamesSet = new HashSet(); List cInfoLst = out_rwsch.getRowSchema().getSignature(); ArrayList columnNames = new ArrayList(); + Map windowToAlias = null; + if (windowExpressions != null ) { + windowToAlias = new HashMap(); + for (WindowExpressionSpec wes : windowExpressions) { + windowToAlias.put(wes.getExpression().toStringTree().toLowerCase(), wes.getAlias()); + } + } String[] qualifiedColNames; String tmpColAlias; for (int i = 0; i < calciteColLst.size(); i++) { @@ -2276,8 +2426,11 @@ * the names so we don't run into this issue when converting back to * Hive AST. */ - if (tmpColAlias.startsWith("_c")) + if (tmpColAlias.startsWith("_c")) { tmpColAlias = "_o_" + tmpColAlias; + } else if (windowToAlias != null && windowToAlias.containsKey(tmpColAlias)) { + tmpColAlias = windowToAlias.get(tmpColAlias); + } int suffix = 1; while (colNamesSet.contains(tmpColAlias)) { tmpColAlias = qualifiedColNames[1] + suffix; @@ -2769,4 +2922,5 @@ return tabAliases; } } + } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (working copy) @@ -38,7 +38,7 @@ * SemanticAnalyzer.saveViewDefinition() calls TokenRewriteStream.toString(). * */ -class UnparseTranslator { +public class UnparseTranslator { // key is token start index private final NavigableMap translations; private final List copyTranslations; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (revision 1674187) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (working copy) @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; /** @@ -102,6 +103,14 @@ return tableNames; } + public List getColumnNames() { + List columnNames = new ArrayList(); + for (ColumnInfo var : this.signature) { + columnNames.add(var.getInternalName()); + } + return columnNames; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof RowSchema) || (obj == null)) {