Index: pom.xml
===================================================================
--- pom.xml (revision 1673283)
+++ pom.xml (working copy)
@@ -100,7 +100,7 @@
3.4
1.7.5
0.8.0.RELEASE
- 1.1.0-incubating
+ 1.2.0-incubating-SNAPSHOT
3.2.6
3.2.10
3.2.9
Index: metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
===================================================================
--- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (revision 1673283)
+++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (working copy)
@@ -5487,7 +5487,7 @@
ex = e;
throw newMetaException(e);
} finally {
- endFunction("get_function", func != null, ex);
+ endFunction("get_database", func != null, ex);
}
return func;
Index: metastore/bin/.gitignore
===================================================================
--- metastore/bin/.gitignore (revision 1673283)
+++ metastore/bin/.gitignore (working copy)
@@ -1 +1 @@
-# Dummy file to make Git recognize this empty directory
+/src/
Index: itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
===================================================================
--- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (revision 1673283)
+++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (working copy)
@@ -266,7 +266,7 @@
// Set some conf parameters
String hiveConf = "hive.cli.print.header=true;hive.server2.async.exec.shutdown.timeout=20;"
- + "hive.server2.async.exec.threads=30;hive.server2.thrift.max.worker.threads=15";
+ + "hive.server2.async.exec.threads=30;hive.server2.thrift.http.max.worker.threads=15";
// Set some conf vars
String hiveVar = "stab=salesTable;icol=customerID";
String jdbcUri = miniHS2.getJdbcURL() + "?" + hiveConf + "#" + hiveVar;
@@ -284,7 +284,7 @@
verifyConfProperty(stmt, "hive.cli.print.header", "true");
verifyConfProperty(stmt, "hive.server2.async.exec.shutdown.timeout", "20");
verifyConfProperty(stmt, "hive.server2.async.exec.threads", "30");
- verifyConfProperty(stmt, "hive.server2.thrift.max.worker.threads",
+ verifyConfProperty(stmt, "hive.server2.thrift.http.max.worker.threads",
"15");
verifyConfProperty(stmt, "stab", "salesTable");
verifyConfProperty(stmt, "icol", "customerID");
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1673283)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -703,6 +703,9 @@
// CBO related
HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."),
+ HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", false, "Flag to control calcite plan to hive operator conversion"),
+ EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", true, "Flag to control enabling the extended cost model based on"
+ + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."),
// hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
// need to remove by hive .13. Also, do not change default (see SMB operator)
Index: ql/.gitignore
===================================================================
--- ql/.gitignore (revision 1673283)
+++ ql/.gitignore (working copy)
@@ -1 +1,2 @@
dependency-reduced-pom.xml
+/bin/
Index: ql/src/test/results/clientpositive/spark/join33.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/join33.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/join33.q.out (working copy)
@@ -113,16 +113,16 @@
Map 1
Map Operator Tree:
TableScan
- alias: y
+ alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
@@ -136,9 +136,12 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -146,11 +149,13 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -160,26 +165,23 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
+ name: default.srcpart
+ name: default.srcpart
Truncated Path -> Alias:
- /src [y]
+ /srcpart/ds=2008-04-08/hr=11 [z]
Map 3
Map Operator Tree:
TableScan
@@ -188,7 +190,7 @@
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (value is not null and key is not null) (type: boolean)
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -197,7 +199,7 @@
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
+ 1 _col0 (type: string)
Position of Big Table: 0
Local Work:
Map Reduce Local Work
@@ -258,24 +260,24 @@
Map 2
Map Operator Tree:
TableScan
- alias: z
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3
input vertices:
1 Map 3
Position of Big Table: 0
@@ -286,13 +288,13 @@
keys:
0 _col0 (type: string)
1 _col3 (type: string)
- outputColumnNames: _col1, _col2, _col5
+ outputColumnNames: _col0, _col4, _col5
input vertices:
0 Map 1
Position of Big Table: 1
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -328,12 +330,9 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -341,13 +340,11 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -357,23 +354,26 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [z]
+ /src [y]
Stage: Stage-0
Move Operator
@@ -422,8 +422,8 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
Index: ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out (working copy)
@@ -346,28 +346,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -767,7 +767,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -775,7 +775,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1296,28 +1296,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1629,21 +1629,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -1651,7 +1651,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1998,21 +1998,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col2, 1, _col2
name: lag
window function: GenericUDAFLagEvaluator
@@ -2020,7 +2020,7 @@
isPivotResult: true
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2954,7 +2954,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -2962,7 +2962,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3277,28 +3277,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3601,28 +3601,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3995,28 +3995,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4349,20 +4349,20 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col5
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol1
+ alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4862,33 +4862,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -4896,7 +4896,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -5450,14 +5450,14 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -5898,28 +5898,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -5969,15 +5969,15 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(5)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint)
- outputColumnNames: _col1, _col2, _col5, _wcol0
+ expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col5, sum_window_0
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
@@ -5985,7 +5985,7 @@
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
tag: -1
- value expressions: _wcol0 (type: bigint), _col5 (type: int)
+ value expressions: sum_window_0 (type: bigint), _col5 (type: int)
auto parallelism: false
Reducer 5
Needs Tagging: false
@@ -6008,35 +6008,35 @@
raw input shape:
window functions:
window function definition
- alias: _wcol1
+ alias: rank_window_1
arguments: _col3, _col2
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: dense_rank_window_2
arguments: _col3, _col2
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol3
+ alias: cume_dist_window_3
arguments: _col3, _col2
name: cume_dist
window function: GenericUDAFCumeDistEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol4
+ alias: first_value_window_4
arguments: _col6, true
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int)
+ expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -6626,28 +6626,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -7076,28 +7076,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -7487,28 +7487,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -7949,28 +7949,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -8420,28 +8420,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -8850,28 +8850,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/spark/ptf.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/ptf.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/ptf.q.out (working copy)
@@ -93,28 +93,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -297,7 +297,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -305,7 +305,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -571,28 +571,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -748,21 +748,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -770,7 +770,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -950,21 +950,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col2, 1, _col2
name: lag
window function: GenericUDAFLagEvaluator
@@ -972,7 +972,7 @@
isPivotResult: true
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1440,7 +1440,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -1448,7 +1448,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1613,28 +1613,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1786,28 +1786,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2019,28 +2019,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2202,20 +2202,20 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col5
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol1
+ alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2414,33 +2414,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -2448,7 +2448,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2767,14 +2767,14 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2954,28 +2954,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3006,22 +3006,22 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(5)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint)
- outputColumnNames: _col1, _col2, _col5, _wcol0
+ expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col5, sum_window_0
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: _wcol0 (type: bigint), _col5 (type: int)
+ value expressions: sum_window_0 (type: bigint), _col5 (type: int)
Reducer 5
Reduce Operator Tree:
Select Operator
@@ -3042,35 +3042,35 @@
raw input shape:
window functions:
window function definition
- alias: _wcol1
+ alias: rank_window_1
arguments: _col3, _col2
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: dense_rank_window_2
arguments: _col3, _col2
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol3
+ alias: cume_dist_window_3
arguments: _col3, _col2
name: cume_dist
window function: GenericUDAFCumeDistEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol4
+ alias: first_value_window_4
arguments: _col6, true
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int)
+ expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3444,28 +3444,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3710,28 +3710,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3952,28 +3952,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4231,28 +4231,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4492,28 +4492,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4740,28 +4740,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/spark/subquery_in.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/subquery_in.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/subquery_in.q.out (working copy)
@@ -327,7 +327,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -335,7 +335,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col5 (type: int)
@@ -495,7 +495,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -503,7 +503,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean)
+ predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col5 (type: int)
Index: ql/src/test/results/clientpositive/spark/ptf_streaming.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/ptf_streaming.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/ptf_streaming.q.out (working copy)
@@ -93,28 +93,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -297,7 +297,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -305,7 +305,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -615,7 +615,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -623,7 +623,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -788,28 +788,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1021,28 +1021,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1256,28 +1256,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1491,28 +1491,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1705,33 +1705,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -1739,7 +1739,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1980,28 +1980,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2246,28 +2246,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2496,28 +2496,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/join32_lessSize.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out (working copy)
@@ -121,16 +121,16 @@
Map 1
Map Operator Tree:
TableScan
- alias: y
+ alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
@@ -144,9 +144,12 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -154,11 +157,13 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -168,26 +173,23 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
+ name: default.srcpart
+ name: default.srcpart
Truncated Path -> Alias:
- /src [y]
+ /srcpart/ds=2008-04-08/hr=11 [z]
Map 3
Map Operator Tree:
TableScan
@@ -196,7 +198,7 @@
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (value is not null and key is not null) (type: boolean)
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -205,7 +207,7 @@
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
+ 1 _col0 (type: string)
Position of Big Table: 0
Local Work:
Map Reduce Local Work
@@ -266,24 +268,24 @@
Map 2
Map Operator Tree:
TableScan
- alias: z
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3
input vertices:
1 Map 3
Position of Big Table: 0
@@ -294,13 +296,13 @@
keys:
0 _col0 (type: string)
1 _col3 (type: string)
- outputColumnNames: _col1, _col2, _col5
+ outputColumnNames: _col0, _col4, _col5
input vertices:
0 Map 1
Position of Big Table: 1
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -336,12 +338,9 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -349,13 +348,11 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -365,23 +362,26 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [z]
+ /src [y]
Stage: Stage-0
Move Operator
@@ -430,8 +430,8 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
@@ -613,35 +613,34 @@
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-3 depends on stages: Stage-4
+ Stage-3 is a root stage
Stage-1 depends on stages: Stage-3
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Spark
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ alias: w
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col0 (type: string)
+ 1 _col1 (type: string)
Position of Big Table: 1
Local Work:
Map Reduce Local Work
@@ -650,7 +649,7 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src1
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -660,14 +659,14 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src1
+ name default.src
numFiles 1
- numRows 25
- rawDataSize 191
- serialization.ddl struct src1 { string key, string value}
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 216
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -680,44 +679,39 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src1
+ name default.src
numFiles 1
- numRows 25
- rawDataSize 191
- serialization.ddl struct src1 { string key, string value}
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 216
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src1
- name: default.src1
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /src1 [x]
-
- Stage: Stage-3
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /src [w]
+ Map 3
Map Operator Tree:
TableScan
- alias: w
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: x
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ predicate: (value is not null and key is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
1 _col1 (type: string)
- Position of Big Table: 1
+ Position of Big Table: 0
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -725,7 +719,7 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -735,14 +729,14 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.src1
numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ numRows 25
+ rawDataSize 191
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -755,20 +749,20 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.src1
numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ numRows 25
+ rawDataSize 191
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
+ name: default.src1
+ name: default.src1
Truncated Path -> Alias:
- /src [w]
+ /src1 [x]
Map 4
Map Operator Tree:
TableScan
@@ -783,21 +777,10 @@
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 3
- Position of Big Table: 1
- Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
Position of Big Table: 0
Local Work:
Map Reduce Local Work
@@ -875,11 +858,22 @@
keys:
0 _col0 (type: string)
1 _col1 (type: string)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col1, _col4
input vertices:
1 Map 4
Position of Big Table: 0
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -890,17 +884,17 @@
input vertices:
0 Map 1
Position of Big Table: 1
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
Index: ql/src/test/results/clientpositive/spark/join32.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/join32.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/join32.q.out (working copy)
@@ -113,16 +113,16 @@
Map 1
Map Operator Tree:
TableScan
- alias: y
+ alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
@@ -136,9 +136,12 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -146,11 +149,13 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -160,26 +165,23 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
+ name: default.srcpart
+ name: default.srcpart
Truncated Path -> Alias:
- /src [y]
+ /srcpart/ds=2008-04-08/hr=11 [z]
Map 3
Map Operator Tree:
TableScan
@@ -188,7 +190,7 @@
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (value is not null and key is not null) (type: boolean)
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -197,7 +199,7 @@
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
+ 1 _col0 (type: string)
Position of Big Table: 0
Local Work:
Map Reduce Local Work
@@ -258,24 +260,24 @@
Map 2
Map Operator Tree:
TableScan
- alias: z
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3
input vertices:
1 Map 3
Position of Big Table: 0
@@ -286,13 +288,13 @@
keys:
0 _col0 (type: string)
1 _col3 (type: string)
- outputColumnNames: _col1, _col2, _col5
+ outputColumnNames: _col0, _col4, _col5
input vertices:
0 Map 1
Position of Big Table: 1
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -328,12 +330,9 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -341,13 +340,11 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -357,23 +354,26 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [z]
+ /src [y]
Stage: Stage-0
Move Operator
@@ -422,8 +422,8 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
Index: ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
===================================================================
--- ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (working copy)
@@ -390,9 +390,9 @@
Stage: Stage-1
Spark
Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2)
- Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -401,81 +401,99 @@
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: p_partkey is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ predicate: (p_partkey is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: p_name is not null (type: boolean)
+ predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: p_name (type: string)
- outputColumnNames: _col0
+ expressions: p_partkey (type: int), p_name (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 5
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (p_partkey is not null and p_name is not null) (type: boolean)
+ predicate: (p_name is not null and p_partkey is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
Map 7
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (p_partkey is not null and p_name is not null) (type: boolean)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: p_partkey (type: int), p_name (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col1 (type: int)
- outputColumnNames: _col1, _col2, _col4, _col6
+ 0 _col0 (type: int), _col1 (type: string)
+ 1 _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col1, _col3, _col5, _col6
Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -485,38 +503,22 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 6
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col4
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string)
- Reducer 6
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int), _col1 (type: string)
- 1 _col0 (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col3 (type: string)
- sort order: +
- Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string)
+ value expressions: _col2 (type: string)
Stage: Stage-0
Fetch Operator
@@ -542,9 +544,9 @@
Stage: Stage-1
Spark
Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2)
- Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -553,35 +555,36 @@
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: p_partkey is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ predicate: (p_partkey is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: p_name is not null (type: boolean)
+ predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: p_name (type: string)
- outputColumnNames: _col0
+ expressions: p_partkey (type: int), p_name (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 5
Map Operator Tree:
TableScan
@@ -595,39 +598,56 @@
outputColumnNames: _col0, _col1
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string), _col0 (type: int)
- sort order: ++
- Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
Map 7
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (p_name is not null and p_partkey is not null) (type: boolean)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: p_partkey (type: int), p_name (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
key expressions: _col1 (type: string), _col0 (type: int)
sort order: ++
Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
+ Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col1 (type: int)
- outputColumnNames: _col1, _col2, _col4, _col6
+ 0 _col1 (type: string), _col0 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6
Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -637,38 +657,22 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 6
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col4
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string)
- Reducer 6
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string), _col0 (type: int)
- 1 _col1 (type: string), _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col3 (type: string)
- sort order: +
- Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string)
+ value expressions: _col2 (type: string)
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out
===================================================================
--- ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/subquery_in_explain_rewrite.q.out (working copy)
@@ -185,11 +185,11 @@
(select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1
Where Clause SubQuery Joining Condition:
- on part.p_size = sq_1._wcol0
+ on part.p_size = sq_1.first_value_window_0
Rewritten Query:
select p_mfgr, p_name, p_size
-from part left semi join (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 on part.p_size = sq_1._wcol0
+from part left semi join (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) sq_1 on part.p_size = sq_1.first_value_window_0
where 1 = 1
PREHOOK: query: -- non agg, non corr, with join in Parent Query
explain rewrite
Index: ql/src/test/results/clientpositive/join33.q.out
===================================================================
--- ql/src/test/results/clientpositive/join33.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/join33.q.out (working copy)
@@ -109,25 +109,71 @@
Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:y
+ $hdt$_0:z
Fetch Operator
limit: -1
+ Partition Description:
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
$hdt$_1:$hdt$_2:x
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:y
+ $hdt$_0:z
TableScan
- alias: y
+ alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
@@ -141,7 +187,7 @@
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (value is not null and key is not null) (type: boolean)
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -150,31 +196,31 @@
HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
+ 1 _col0 (type: string)
Position of Big Table: 0
Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
- alias: z
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3
Position of Big Table: 0
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
@@ -183,11 +229,11 @@
keys:
0 _col0 (type: string)
1 _col3 (type: string)
- outputColumnNames: _col1, _col2, _col5
+ outputColumnNames: _col0, _col4, _col5
Position of Big Table: 1
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -356,7 +402,7 @@
name: default.srcpart
name: default.srcpart
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z]
+ /src [$hdt$_1:$hdt$_1:y]
Stage: Stage-0
Move Operator
@@ -405,8 +451,8 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
Index: ql/src/test/results/clientpositive/ptf.q.out
===================================================================
--- ql/src/test/results/clientpositive/ptf.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/ptf.q.out (working copy)
@@ -97,28 +97,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -313,7 +313,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -321,7 +321,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -585,28 +585,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -766,21 +766,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -788,7 +788,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -982,21 +982,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col2, 1, _col2
name: lag
window function: GenericUDAFLagEvaluator
@@ -1004,7 +1004,7 @@
isPivotResult: true
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1480,7 +1480,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -1488,7 +1488,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1657,28 +1657,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1834,28 +1834,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2081,28 +2081,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2268,20 +2268,20 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col5
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol1
+ alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2492,33 +2492,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -2526,7 +2526,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2853,14 +2853,14 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3075,28 +3075,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3150,15 +3150,15 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(5)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint)
- outputColumnNames: _col1, _col2, _col5, _wcol0
+ expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col5, sum_window_0
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -3176,7 +3176,7 @@
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: _wcol0 (type: bigint), _col5 (type: int)
+ value expressions: sum_window_0 (type: bigint), _col5 (type: int)
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
@@ -3196,35 +3196,35 @@
raw input shape:
window functions:
window function definition
- alias: _wcol1
+ alias: rank_window_1
arguments: _col3, _col2
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: dense_rank_window_2
arguments: _col3, _col2
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol3
+ alias: cume_dist_window_3
arguments: _col3, _col2
name: cume_dist
window function: GenericUDAFCumeDistEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol4
+ alias: first_value_window_4
arguments: _col6, true
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int)
+ expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3551,28 +3551,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3841,28 +3841,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4097,28 +4097,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4400,28 +4400,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4675,28 +4675,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4937,28 +4937,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/subquery_in.q.out
===================================================================
--- ql/src/test/results/clientpositive/subquery_in.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/subquery_in.q.out (working copy)
@@ -278,7 +278,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -286,7 +286,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col5 (type: int)
@@ -458,7 +458,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -466,7 +466,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean)
+ predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col5 (type: int)
Index: ql/src/test/results/clientpositive/subquery_in_having.q.out
===================================================================
--- ql/src/test/results/clientpositive/subquery_in_having.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/subquery_in_having.q.out (working copy)
@@ -1357,17 +1357,17 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: first_value_window_0
arguments: _col1
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: PRECEDING(MAX)~
Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: _wcol0 is not null (type: boolean)
+ predicate: first_value_window_0 is not null (type: boolean)
Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _wcol0 (type: string)
+ expressions: first_value_window_0 (type: string)
outputColumnNames: _col0
Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Index: ql/src/test/results/clientpositive/tez/cbo_join.q.out
===================================================================
--- ql/src/test/results/clientpositive/tez/cbo_join.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/tez/cbo_join.q.out (working copy)
@@ -1,4 +1,5 @@
-PREHOOK: query: -- 4. Test Select + Join + TS
+PREHOOK: query: -- SORT_QUERY_RESULTS
+-- 4. Test Select + Join + TS
select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -6,7 +7,8 @@
PREHOOK: Input: default@cbo_t2
PREHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-POSTHOOK: query: -- 4. Test Select + Join + TS
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+-- 4. Test Select + Join + TS
select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@cbo_t1
@@ -122,46 +124,6 @@
POSTHOOK: Input: default@cbo_t1@dt=2014
POSTHOOK: Input: default@cbo_t3
#### A masked pattern was here ####
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
1
1
1
@@ -522,6 +484,46 @@
1
1
1
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -632,8 +634,6 @@
POSTHOOK: Input: default@cbo_t2
POSTHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-NULL NULL
-NULL NULL
1 1
1 1
1 1
@@ -730,6 +730,8 @@
1 1
1 1
1 1
+NULL NULL
+NULL NULL
PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -744,8 +746,6 @@
POSTHOOK: Input: default@cbo_t2
POSTHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-NULL NULL
-NULL NULL
1 1
1 1
1 1
@@ -847,6 +847,8 @@
NULL 2
NULL 2
NULL 2
+NULL NULL
+NULL NULL
PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -861,10 +863,6 @@
POSTHOOK: Input: default@cbo_t2
POSTHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-NULL NULL
-NULL NULL
-NULL NULL
-NULL NULL
1 1
1 1
1 1
@@ -966,6 +964,10 @@
NULL 2
NULL 2
NULL 2
+NULL NULL
+NULL NULL
+NULL NULL
+NULL NULL
PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -5334,8 +5336,6 @@
POSTHOOK: Input: default@cbo_t2@dt=2014
POSTHOOK: Input: default@cbo_t3
#### A masked pattern was here ####
-NULL NULL NULL NULL
-NULL NULL NULL NULL
1 1 1 1
1 1 1 1
1 1 1 1
@@ -5870,6 +5870,8 @@
NULL NULL NULL NULL
NULL NULL NULL NULL
NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL
PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -6430,8 +6432,6 @@
POSTHOOK: Input: default@cbo_t2@dt=2014
POSTHOOK: Input: default@cbo_t3
#### A masked pattern was here ####
-NULL NULL NULL NULL
-NULL NULL NULL NULL
1 1 1 1
1 1 1 1
1 1 1 1
@@ -6966,6 +6966,8 @@
NULL NULL NULL NULL
NULL NULL NULL NULL
NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL
PREHOOK: query: -- 5. Test Select + Join + FIL + TS
select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0)
PREHOOK: type: QUERY
Index: ql/src/test/results/clientpositive/tez/ptf.q.out
===================================================================
--- ql/src/test/results/clientpositive/tez/ptf.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/tez/ptf.q.out (working copy)
@@ -93,28 +93,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -297,7 +297,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -305,7 +305,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -571,28 +571,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -748,21 +748,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -770,7 +770,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -950,21 +950,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col2, 1, _col2
name: lag
window function: GenericUDAFLagEvaluator
@@ -972,7 +972,7 @@
isPivotResult: true
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1440,7 +1440,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -1448,7 +1448,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1613,28 +1613,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1786,28 +1786,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2019,28 +2019,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2202,20 +2202,20 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col5
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol1
+ alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2414,33 +2414,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -2448,7 +2448,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2767,14 +2767,14 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2986,28 +2986,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3038,22 +3038,22 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(5)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint)
- outputColumnNames: _col1, _col2, _col5, _wcol0
+ expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col5, sum_window_0
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: _wcol0 (type: bigint), _col5 (type: int)
+ value expressions: sum_window_0 (type: bigint), _col5 (type: int)
Reducer 5
Reduce Operator Tree:
Select Operator
@@ -3074,35 +3074,35 @@
raw input shape:
window functions:
window function definition
- alias: _wcol1
+ alias: rank_window_1
arguments: _col3, _col2
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: dense_rank_window_2
arguments: _col3, _col2
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol3
+ alias: cume_dist_window_3
arguments: _col3, _col2
name: cume_dist
window function: GenericUDAFCumeDistEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol4
+ alias: first_value_window_4
arguments: _col6, true
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int)
+ expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3431,28 +3431,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3697,28 +3697,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3939,28 +3939,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4218,28 +4218,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4479,28 +4479,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4727,28 +4727,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/tez/explainuser_1.q.out
===================================================================
--- ql/src/test/results/clientpositive/tez/explainuser_1.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out (working copy)
@@ -3665,7 +3665,7 @@
outputColumnNames:["_col0"]
Statistics:Num rows: 13 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator [FIL_26]
- predicate:_wcol0 is not null (type: boolean)
+ predicate:first_value_window_0 is not null (type: boolean)
Statistics:Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator [PTF_11]
Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}]
@@ -7596,9 +7596,9 @@
Map-reduce partition columns:_col2 (type: string)
sort order:++
Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- value expressions:_wcol0 (type: bigint), _col5 (type: int)
+ value expressions:sum_window_0 (type: bigint), _col5 (type: int)
Select Operator [SEL_13]
- outputColumnNames:["_col1","_col2","_col5","_wcol0"]
+ outputColumnNames:["_col1","_col2","_col5","sum_window_0"]
Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
PTF Operator [PTF_12]
Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}]
Index: ql/src/test/results/clientpositive/tez/subquery_in.q.out
===================================================================
--- ql/src/test/results/clientpositive/tez/subquery_in.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/tez/subquery_in.q.out (working copy)
@@ -335,7 +335,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -343,7 +343,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col5 (type: int)
@@ -507,7 +507,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -515,7 +515,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean)
+ predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col5 (type: int)
Index: ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
===================================================================
--- ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out (working copy)
@@ -346,28 +346,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -768,7 +768,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -776,7 +776,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1297,28 +1297,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1630,21 +1630,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -1652,7 +1652,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1999,21 +1999,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col2, 1, _col2
name: lag
window function: GenericUDAFLagEvaluator
@@ -2021,7 +2021,7 @@
isPivotResult: true
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2957,7 +2957,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -2965,7 +2965,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3280,28 +3280,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3604,28 +3604,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3998,28 +3998,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4352,20 +4352,20 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col5
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol1
+ alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4866,33 +4866,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -4900,7 +4900,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -5454,14 +5454,14 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -5939,28 +5939,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -6010,15 +6010,15 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(5)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint)
- outputColumnNames: _col1, _col2, _col5, _wcol0
+ expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col5, sum_window_0
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
@@ -6026,7 +6026,7 @@
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
tag: -1
- value expressions: _wcol0 (type: bigint), _col5 (type: int)
+ value expressions: sum_window_0 (type: bigint), _col5 (type: int)
auto parallelism: true
Reducer 5
Needs Tagging: false
@@ -6049,35 +6049,35 @@
raw input shape:
window functions:
window function definition
- alias: _wcol1
+ alias: rank_window_1
arguments: _col3, _col2
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: dense_rank_window_2
arguments: _col3, _col2
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol3
+ alias: cume_dist_window_3
arguments: _col3, _col2
name: cume_dist
window function: GenericUDAFCumeDistEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol4
+ alias: first_value_window_4
arguments: _col6, true
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int)
+ expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -6616,28 +6616,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -7066,28 +7066,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -7477,28 +7477,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -7939,28 +7939,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -8410,28 +8410,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -8840,28 +8840,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/tez/ptf_streaming.q.out
===================================================================
--- ql/src/test/results/clientpositive/tez/ptf_streaming.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/tez/ptf_streaming.q.out (working copy)
@@ -93,28 +93,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -297,7 +297,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -305,7 +305,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -615,7 +615,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -623,7 +623,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -788,28 +788,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1021,28 +1021,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1256,28 +1256,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1491,28 +1491,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1705,33 +1705,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -1739,7 +1739,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1980,28 +1980,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2246,28 +2246,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2496,28 +2496,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/tez/explainuser_2.q.out
===================================================================
--- ql/src/test/results/clientpositive/tez/explainuser_2.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out (working copy)
@@ -53,11 +53,11 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@ss
POSTHOOK: Lineage: ss.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: ss.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: ss.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: ss.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: ss.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: ss.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: INSERT OVERWRITE TABLE sr
SELECT x.key,x.value,y.key,y.value,z.key,z.value
FROM src1 x
@@ -81,11 +81,11 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
POSTHOOK: Output: default@sr
POSTHOOK: Lineage: sr.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: sr.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: sr.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: sr.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: sr.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: sr.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: sr.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: sr.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: sr.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: sr.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: INSERT OVERWRITE TABLE cs
SELECT x.key,x.value,y.key,y.value,z.key,z.value
FROM src1 x
@@ -195,7 +195,7 @@
Merge Join Operator [MERGEJOIN_29]
| condition map:[{"":"Inner Join 0 to 1"}]
| keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"}
- | outputColumnNames:["_col1","_col2","_col5"]
+ | outputColumnNames:["_col0","_col4","_col5"]
| Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
|<-Map 1 [SIMPLE_EDGE]
| Reduce Output Operator [RS_14]
@@ -203,15 +203,14 @@
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Select Operator [SEL_1]
- | outputColumnNames:["_col0","_col1"]
+ | Select Operator [SEL_2]
+ | outputColumnNames:["_col0"]
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| Filter Operator [FIL_25]
- | predicate:key is not null (type: boolean)
+ | predicate:value is not null (type: boolean)
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| TableScan [TS_0]
- | alias:y
+ | alias:z
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
|<-Reducer 4 [SIMPLE_EDGE]
Reduce Output Operator [RS_16]
@@ -219,11 +218,11 @@
Map-reduce partition columns:_col3 (type: string)
sort order:+
Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col0 (type: string)
+ value expressions:_col1 (type: string), _col2 (type: string)
Merge Join Operator [MERGEJOIN_28]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
- | outputColumnNames:["_col0","_col3"]
+ | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
+ | outputColumnNames:["_col1","_col2","_col3"]
| Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
|<-Map 3 [SIMPLE_EDGE]
| Reduce Output Operator [RS_8]
@@ -231,27 +230,28 @@
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col1 (type: string)
| Select Operator [SEL_4]
- | outputColumnNames:["_col0"]
+ | outputColumnNames:["_col0","_col1"]
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| Filter Operator [FIL_26]
- | predicate:value is not null (type: boolean)
+ | predicate:key is not null (type: boolean)
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_2]
- | alias:z
+ | TableScan [TS_3]
+ | alias:y
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
|<-Map 5 [SIMPLE_EDGE]
Reduce Output Operator [RS_10]
- key expressions:_col1 (type: string)
- Map-reduce partition columns:_col1 (type: string)
+ key expressions:_col0 (type: string)
+ Map-reduce partition columns:_col0 (type: string)
sort order:+
Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col0 (type: string)
+ value expressions:_col1 (type: string)
Select Operator [SEL_6]
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Filter Operator [FIL_27]
- predicate:(value is not null and key is not null) (type: boolean)
+ predicate:(key is not null and value is not null) (type: boolean)
Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
TableScan [TS_5]
alias:x
@@ -315,21 +315,21 @@
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
-Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 9 <- Reducer 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
-Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:100
Stage-1
- Reducer 5
+ Reducer 6
File Output Operator [FS_71]
compressed:false
Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
@@ -339,52 +339,164 @@
Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
Select Operator [SEL_69]
| outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 4 [SIMPLE_EDGE]
+ | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 5 [SIMPLE_EDGE]
Reduce Output Operator [RS_68]
key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order:+++
- Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE
value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint)
Group By Operator [GBY_66]
| aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"]
| keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
| outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 3 [SIMPLE_EDGE]
+ | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 4 [SIMPLE_EDGE]
Reduce Output Operator [RS_65]
key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order:+++
- Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint)
Group By Operator [GBY_64]
aggregations:["count(_col3)","count(_col4)","count(_col5)"]
keys:_col0 (type: string), _col1 (type: string), _col2 (type: string)
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
Select Operator [SEL_62]
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
- Merge Join Operator [MERGEJOIN_113]
+ Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_111]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col15 (type: string), _col17 (type: string)","0":"_col1 (type: string), _col3 (type: string)"}
- | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"]
- | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
+ | keys:{"1":"_col8 (type: string), _col10 (type: string)","0":"_col8 (type: string), _col10 (type: string)"}
+ | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"]
+ | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 12 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_60]
+ | key expressions:_col8 (type: string), _col10 (type: string)
+ | Map-reduce partition columns:_col8 (type: string), _col10 (type: string)
+ | sort order:++
+ | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col6 (type: string), _col7 (type: string)
+ | Select Operator [SEL_46]
+ | outputColumnNames:["_col10","_col6","_col7","_col8"]
+ | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE
+ | Merge Join Operator [MERGEJOIN_109]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"}
+ | | outputColumnNames:["_col6","_col7","_col8","_col10"]
+ | | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 11 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_42]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
+ | | Select Operator [SEL_19]
+ | | outputColumnNames:["_col1"]
+ | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_101]
+ | | predicate:((key = 'src1key') and value is not null) (type: boolean)
+ | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_17]
+ | | alias:src1
+ | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | |<-Reducer 14 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_44]
+ | key expressions:_col5 (type: string)
+ | Map-reduce partition columns:_col5 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string)
+ | Merge Join Operator [MERGEJOIN_108]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col4","_col5","_col6","_col8"]
+ | | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 13 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_36]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | Select Operator [SEL_22]
+ | | outputColumnNames:["_col0"]
+ | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_102]
+ | | predicate:((value = 'd1value') and key is not null) (type: boolean)
+ | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_20]
+ | | alias:d1
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | |<-Reducer 16 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_38]
+ | key expressions:_col2 (type: string)
+ | Map-reduce partition columns:_col2 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string)
+ | Merge Join Operator [MERGEJOIN_107]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"}
+ | | outputColumnNames:["_col2","_col3","_col4","_col6"]
+ | | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 15 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_30]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | | Select Operator [SEL_25]
+ | | outputColumnNames:["_col1"]
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_103]
+ | | predicate:((key = 'srcpartkey') and value is not null) (type: boolean)
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_23]
+ | | alias:srcpart
+ | | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 17 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_32]
+ | key expressions:_col3 (type: string)
+ | Map-reduce partition columns:_col3 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string)
+ | Select Operator [SEL_28]
+ | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"]
+ | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_104]
+ | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean)
+ | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_26]
+ | alias:ss
+ | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 3 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_58]
+ key expressions:_col8 (type: string), _col10 (type: string)
+ Map-reduce partition columns:_col8 (type: string), _col10 (type: string)
+ sort order:++
+ Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string)
+ Merge Join Operator [MERGEJOIN_110]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"1":"_col3 (type: string), _col5 (type: string)","0":"_col1 (type: string), _col3 (type: string)"}
+ | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"]
+ | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
|<-Reducer 2 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_58]
+ | Reduce Output Operator [RS_53]
| key expressions:_col1 (type: string), _col3 (type: string)
| Map-reduce partition columns:_col1 (type: string), _col3 (type: string)
| sort order:++
| Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
| value expressions:_col2 (type: string)
- | Merge Join Operator [MERGEJOIN_107]
+ | Merge Join Operator [MERGEJOIN_105]
| | condition map:[{"":"Inner Join 0 to 1"}]
| | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
| | outputColumnNames:["_col1","_col2","_col3"]
| | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
| |<-Map 1 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_53]
+ | | Reduce Output Operator [RS_48]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
@@ -393,14 +505,14 @@
| | Select Operator [SEL_1]
| | outputColumnNames:["_col0","_col1","_col2","_col3"]
| | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_99]
+ | | Filter Operator [FIL_97]
| | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean)
| | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE
| | TableScan [TS_0]
| | alias:cs
| | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 6 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_55]
+ | |<-Map 7 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_50]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
@@ -408,111 +520,26 @@
| Select Operator [SEL_4]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_100]
+ | Filter Operator [FIL_98]
| predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean)
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
| TableScan [TS_2]
| alias:d1
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
|<-Reducer 9 [SIMPLE_EDGE]
- Reduce Output Operator [RS_60]
- key expressions:_col15 (type: string), _col17 (type: string)
- Map-reduce partition columns:_col15 (type: string), _col17 (type: string)
+ Reduce Output Operator [RS_55]
+ key expressions:_col3 (type: string), _col5 (type: string)
+ Map-reduce partition columns:_col3 (type: string), _col5 (type: string)
sort order:++
- Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col6 (type: string), _col7 (type: string), _col14 (type: string)
- Select Operator [SEL_51]
- outputColumnNames:["_col14","_col15","_col17","_col6","_col7"]
- Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
- Merge Join Operator [MERGEJOIN_112]
+ Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col2 (type: string), _col4 (type: string)
+ Merge Join Operator [MERGEJOIN_106]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col2 (type: string), _col4 (type: string)","0":"_col8 (type: string), _col10 (type: string)"}
- | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"]
- | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 16 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_49]
- | key expressions:_col2 (type: string), _col4 (type: string)
- | Map-reduce partition columns:_col2 (type: string), _col4 (type: string)
- | sort order:++
+ | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
+ | outputColumnNames:["_col2","_col3","_col4","_col5"]
| Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col3 (type: string), _col5 (type: string)
- | Merge Join Operator [MERGEJOIN_111]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
- | | outputColumnNames:["_col2","_col3","_col4","_col5"]
- | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 15 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_36]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
- | | Select Operator [SEL_31]
- | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"]
- | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_105]
- | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean)
- | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_29]
- | | alias:sr
- | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 17 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_38]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
- | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_34]
- | outputColumnNames:["_col0"]
- | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_106]
- | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean)
- | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_32]
- | alias:d1
- | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 8 [SIMPLE_EDGE]
- Reduce Output Operator [RS_47]
- key expressions:_col8 (type: string), _col10 (type: string)
- Map-reduce partition columns:_col8 (type: string), _col10 (type: string)
- sort order:++
- Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col6 (type: string), _col7 (type: string)
- Merge Join Operator [MERGEJOIN_110]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"}
- | outputColumnNames:["_col6","_col7","_col8","_col10"]
- | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE
- |<-Map 7 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_42]
- | key expressions:_col1 (type: string)
- | Map-reduce partition columns:_col1 (type: string)
- | sort order:+
- | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_7]
- | outputColumnNames:["_col1"]
- | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_101]
- | predicate:((key = 'src1key') and value is not null) (type: boolean)
- | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_5]
- | alias:src1
- | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 11 [SIMPLE_EDGE]
- Reduce Output Operator [RS_44]
- key expressions:_col5 (type: string)
- Map-reduce partition columns:_col5 (type: string)
- sort order:+
- Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string)
- Merge Join Operator [MERGEJOIN_109]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"}
- | outputColumnNames:["_col4","_col5","_col6","_col8"]
- | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
|<-Map 10 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_24]
+ | Reduce Output Operator [RS_14]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
@@ -520,54 +547,27 @@
| Select Operator [SEL_10]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_102]
- | predicate:((value = 'd1value') and key is not null) (type: boolean)
+ | Filter Operator [FIL_100]
+ | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean)
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
| TableScan [TS_8]
| alias:d1
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 13 [SIMPLE_EDGE]
- Reduce Output Operator [RS_26]
- key expressions:_col2 (type: string)
- Map-reduce partition columns:_col2 (type: string)
+ |<-Map 8 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_12]
+ key expressions:_col0 (type: string)
+ Map-reduce partition columns:_col0 (type: string)
sort order:+
- Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string)
- Merge Join Operator [MERGEJOIN_108]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"}
- | outputColumnNames:["_col2","_col3","_col4","_col6"]
- | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- |<-Map 12 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_18]
- | key expressions:_col1 (type: string)
- | Map-reduce partition columns:_col1 (type: string)
- | sort order:+
- | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_13]
- | outputColumnNames:["_col1"]
- | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_103]
- | predicate:((key = 'srcpartkey') and value is not null) (type: boolean)
- | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_11]
- | alias:srcpart
- | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- |<-Map 14 [SIMPLE_EDGE]
- Reduce Output Operator [RS_20]
- key expressions:_col3 (type: string)
- Map-reduce partition columns:_col3 (type: string)
- sort order:+
Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string)
- Select Operator [SEL_16]
- outputColumnNames:["_col0","_col1","_col2","_col3","_col4"]
+ value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ Select Operator [SEL_7]
+ outputColumnNames:["_col0","_col2","_col3","_col4","_col5"]
Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- Filter Operator [FIL_104]
- predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean)
+ Filter Operator [FIL_99]
+ predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean)
Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- TableScan [TS_14]
- alias:ss
+ TableScan [TS_5]
+ alias:sr
Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE
PREHOOK: query: explain
SELECT x.key, z.value, y.value
@@ -590,33 +590,33 @@
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 13 <- Union 12 (SIMPLE_EDGE)
-Reducer 3 <- Union 2 (SIMPLE_EDGE)
-Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS)
-Map 11 <- Union 12 (CONTAINS)
-Map 1 <- Union 2 (CONTAINS)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS)
-Map 7 <- Union 2 (CONTAINS)
-Reducer 6 <- Union 5 (SIMPLE_EDGE)
-Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE)
-Map 15 <- Union 12 (CONTAINS)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Map 13 <- Union 14 (CONTAINS)
+Map 5 <- Union 6 (CONTAINS)
+Reducer 4 <- Union 3 (SIMPLE_EDGE)
+Reducer 7 <- Union 6 (SIMPLE_EDGE)
+Map 9 <- Union 6 (CONTAINS)
+Reducer 8 <- Map 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 15 <- Union 14 (SIMPLE_EDGE)
+Reducer 16 <- Map 18 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Map 17 <- Union 14 (CONTAINS)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 6
+ Reducer 4
File Output Operator [FS_61]
compressed:false
- Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Group By Operator [GBY_59]
| keys:KEY._col0 (type: string), KEY._col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
- |<-Union 5 [SIMPLE_EDGE]
- |<-Reducer 14 [CONTAINS]
+ | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ |<-Union 3 [SIMPLE_EDGE]
+ |<-Reducer 2 [CONTAINS]
| Reduce Output Operator [RS_58]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -624,196 +624,196 @@
| Group By Operator [GBY_57]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Select Operator [SEL_53]
+ | Select Operator [SEL_26]
| outputColumnNames:["_col0","_col1"]
| Merge Join Operator [MERGEJOIN_85]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"}
- | | outputColumnNames:["_col0","_col2"]
- | |<-Reducer 13 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_49]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | | Select Operator [SEL_37]
- | | outputColumnNames:["_col0"]
- | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | | Group By Operator [GBY_36]
- | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
- | | | outputColumnNames:["_col0","_col1"]
- | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | | |<-Union 12 [SIMPLE_EDGE]
- | | |<-Map 11 [CONTAINS]
- | | | Reduce Output Operator [RS_35]
- | | | key expressions:_col0 (type: string), _col1 (type: string)
- | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | | sort order:++
- | | | Group By Operator [GBY_34]
- | | | keys:_col0 (type: string), _col1 (type: string)
- | | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_28]
- | | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_78]
- | | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_27]
- | | | alias:x
- | | |<-Map 15 [CONTAINS]
- | | Reduce Output Operator [RS_35]
- | | key expressions:_col0 (type: string), _col1 (type: string)
- | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | sort order:++
- | | Group By Operator [GBY_34]
- | | keys:_col0 (type: string), _col1 (type: string)
- | | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_30]
- | | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_79]
- | | predicate:value is not null (type: boolean)
- | | TableScan [TS_29]
- | | alias:y
- | |<-Reducer 17 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_51]
- | key expressions:_col2 (type: string)
- | Map-reduce partition columns:_col2 (type: string)
- | sort order:+
- | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Merge Join Operator [MERGEJOIN_83]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
| | outputColumnNames:["_col1","_col2"]
- | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 16 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_43]
+ | |<-Map 1 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_22]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
| | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | Select Operator [SEL_39]
+ | | Select Operator [SEL_1]
| | outputColumnNames:["_col0"]
| | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_80]
+ | | Filter Operator [FIL_76]
| | predicate:key is not null (type: boolean)
| | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_38]
+ | | TableScan [TS_0]
| | alias:y
| | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 18 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_45]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
+ | |<-Reducer 8 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_24]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
| sort order:+
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Select Operator [SEL_41]
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_81]
- | predicate:(key is not null and value is not null) (type: boolean)
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_40]
- | alias:x
- | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 4 [CONTAINS]
- Reduce Output Operator [RS_58]
- key expressions:_col0 (type: string), _col1 (type: string)
- Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- sort order:++
- Group By Operator [GBY_57]
- keys:_col0 (type: string), _col1 (type: string)
- outputColumnNames:["_col0","_col1"]
- Select Operator [SEL_26]
- outputColumnNames:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_84]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"}
- | outputColumnNames:["_col0","_col2"]
- |<-Reducer 3 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_22]
+ | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col0 (type: string)
+ | Merge Join Operator [MERGEJOIN_84]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 10 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_18]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Select Operator [SEL_14]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_79]
+ | | predicate:(value is not null and key is not null) (type: boolean)
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_13]
+ | | alias:x
+ | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | |<-Reducer 7 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_16]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_10]
+ | Select Operator [SEL_12]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Group By Operator [GBY_9]
+ | Group By Operator [GBY_11]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | |<-Union 2 [SIMPLE_EDGE]
- | |<-Map 1 [CONTAINS]
- | | Reduce Output Operator [RS_8]
+ | |<-Union 6 [SIMPLE_EDGE]
+ | |<-Map 5 [CONTAINS]
+ | | Reduce Output Operator [RS_10]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_7]
+ | | Group By Operator [GBY_9]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_1]
+ | | Select Operator [SEL_3]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_74]
+ | | Filter Operator [FIL_77]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_0]
+ | | TableScan [TS_2]
| | alias:x
- | |<-Map 7 [CONTAINS]
- | Reduce Output Operator [RS_8]
+ | |<-Map 9 [CONTAINS]
+ | Reduce Output Operator [RS_10]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Group By Operator [GBY_7]
+ | Group By Operator [GBY_9]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Select Operator [SEL_3]
+ | Select Operator [SEL_5]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_75]
+ | Filter Operator [FIL_78]
| predicate:value is not null (type: boolean)
- | TableScan [TS_2]
+ | TableScan [TS_4]
| alias:y
- |<-Reducer 9 [SIMPLE_EDGE]
- Reduce Output Operator [RS_24]
- key expressions:_col2 (type: string)
- Map-reduce partition columns:_col2 (type: string)
- sort order:+
- Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col1 (type: string)
- Merge Join Operator [MERGEJOIN_82]
+ |<-Reducer 12 [CONTAINS]
+ Reduce Output Operator [RS_58]
+ key expressions:_col0 (type: string), _col1 (type: string)
+ Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ sort order:++
+ Group By Operator [GBY_57]
+ keys:_col0 (type: string), _col1 (type: string)
+ outputColumnNames:["_col0","_col1"]
+ Select Operator [SEL_53]
+ outputColumnNames:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_87]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
+ | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
| outputColumnNames:["_col1","_col2"]
- | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- |<-Map 10 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_18]
+ |<-Map 11 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_49]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | Select Operator [SEL_28]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_80]
+ | predicate:key is not null (type: boolean)
+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_27]
+ | alias:y
+ | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 16 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_51]
+ key expressions:_col1 (type: string)
+ Map-reduce partition columns:_col1 (type: string)
+ sort order:+
+ Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ value expressions:_col0 (type: string)
+ Merge Join Operator [MERGEJOIN_86]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 18 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_45]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
| Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Select Operator [SEL_14]
+ | value expressions:_col0 (type: string)
+ | Select Operator [SEL_41]
| outputColumnNames:["_col0","_col1"]
| Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_77]
- | predicate:(key is not null and value is not null) (type: boolean)
+ | Filter Operator [FIL_83]
+ | predicate:(value is not null and key is not null) (type: boolean)
| Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_13]
+ | TableScan [TS_40]
| alias:x
| Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- |<-Map 8 [SIMPLE_EDGE]
- Reduce Output Operator [RS_16]
+ |<-Reducer 15 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_43]
key expressions:_col0 (type: string)
Map-reduce partition columns:_col0 (type: string)
sort order:+
- Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator [SEL_12]
+ Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [SEL_39]
outputColumnNames:["_col0"]
- Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Filter Operator [FIL_76]
- predicate:key is not null (type: boolean)
- Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- TableScan [TS_11]
+ Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator [GBY_38]
+ | keys:KEY._col0 (type: string), KEY._col1 (type: string)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
+ |<-Union 14 [SIMPLE_EDGE]
+ |<-Map 13 [CONTAINS]
+ | Reduce Output Operator [RS_37]
+ | key expressions:_col0 (type: string), _col1 (type: string)
+ | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ | sort order:++
+ | Group By Operator [GBY_36]
+ | keys:_col0 (type: string), _col1 (type: string)
+ | outputColumnNames:["_col0","_col1"]
+ | Select Operator [SEL_30]
+ | outputColumnNames:["_col0","_col1"]
+ | Filter Operator [FIL_81]
+ | predicate:value is not null (type: boolean)
+ | TableScan [TS_29]
+ | alias:x
+ |<-Map 17 [CONTAINS]
+ Reduce Output Operator [RS_37]
+ key expressions:_col0 (type: string), _col1 (type: string)
+ Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ sort order:++
+ Group By Operator [GBY_36]
+ keys:_col0 (type: string), _col1 (type: string)
+ outputColumnNames:["_col0","_col1"]
+ Select Operator [SEL_32]
+ outputColumnNames:["_col0","_col1"]
+ Filter Operator [FIL_82]
+ predicate:value is not null (type: boolean)
+ TableScan [TS_31]
alias:y
- Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
PREHOOK: query: explain
SELECT x.key, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
@@ -843,45 +843,45 @@
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE), Union 7 (CONTAINS)
-Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE)
-Map 24 <- Union 25 (CONTAINS)
-Map 32 <- Union 25 (CONTAINS)
-Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE)
-Reducer 30 <- Union 29 (SIMPLE_EDGE)
-Map 13 <- Union 14 (CONTAINS)
-Map 34 <- Union 29 (CONTAINS)
-Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE)
-Map 1 <- Union 2 (CONTAINS)
-Map 20 <- Union 16 (CONTAINS)
-Map 33 <- Union 27 (CONTAINS)
-Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
-Map 19 <- Union 14 (CONTAINS)
-Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS)
-Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS)
-Reducer 17 <- Union 16 (SIMPLE_EDGE)
-Reducer 8 <- Union 7 (SIMPLE_EDGE)
-Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 32 <- Union 31 (SIMPLE_EDGE)
+Map 11 <- Union 8 (CONTAINS)
+Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS)
+Reducer 25 <- Map 24 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Map 22 <- Union 18 (CONTAINS)
+Map 21 <- Union 16 (CONTAINS)
+Map 34 <- Union 27 (CONTAINS)
+Reducer 10 <- Map 12 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 33 <- Map 37 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE)
+Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+Map 36 <- Union 31 (CONTAINS)
+Map 35 <- Union 29 (CONTAINS)
+Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 19 <- Union 18 (SIMPLE_EDGE)
+Reducer 6 <- Union 5 (SIMPLE_EDGE)
+Reducer 9 <- Union 8 (SIMPLE_EDGE)
+Reducer 17 <- Union 16 (SIMPLE_EDGE), Union 18 (CONTAINS)
+Map 15 <- Union 16 (CONTAINS)
Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS)
-Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS)
-Reducer 3 <- Union 2 (SIMPLE_EDGE)
-Map 9 <- Union 2 (CONTAINS)
+Map 26 <- Union 27 (CONTAINS)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Map 7 <- Union 8 (CONTAINS)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 8
+ Reducer 6
File Output Operator [FS_122]
compressed:false
- Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Group By Operator [GBY_120]
| keys:KEY._col0 (type: string), KEY._col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
- |<-Union 7 [SIMPLE_EDGE]
- |<-Reducer 31 [CONTAINS]
+ | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ |<-Union 5 [SIMPLE_EDGE]
+ |<-Reducer 25 [CONTAINS]
| Reduce Output Operator [RS_119]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -891,149 +891,148 @@
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_114]
| outputColumnNames:["_col0","_col1"]
- | Merge Join Operator [MERGEJOIN_164]
+ | Merge Join Operator [MERGEJOIN_170]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"}
- | | outputColumnNames:["_col2","_col3"]
- | |<-Reducer 30 [SIMPLE_EDGE]
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1","_col3"]
+ | |<-Map 24 [SIMPLE_EDGE]
| | Reduce Output Operator [RS_110]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col1 (type: string)
+ | | Select Operator [SEL_71]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_159]
+ | | predicate:key is not null (type: boolean)
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_70]
+ | | alias:y
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | |<-Reducer 33 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_112]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE
+ | Merge Join Operator [MERGEJOIN_169]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 37 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_106]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Select Operator [SEL_102]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_164]
+ | | predicate:(value is not null and key is not null) (type: boolean)
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_101]
+ | | alias:x
+ | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | |<-Reducer 32 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_104]
+ | key expressions:_col0 (type: string)
+ | Map-reduce partition columns:_col0 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
+ | Select Operator [SEL_100]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
+ | Group By Operator [GBY_99]
+ | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
+ | | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
- | | Select Operator [SEL_98]
- | | outputColumnNames:["_col0"]
- | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
+ | |<-Union 31 [SIMPLE_EDGE]
+ | |<-Reducer 30 [CONTAINS]
+ | | Reduce Output Operator [RS_98]
+ | | key expressions:_col0 (type: string), _col1 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ | | sort order:++
| | Group By Operator [GBY_97]
+ | | keys:_col0 (type: string), _col1 (type: string)
+ | | outputColumnNames:["_col0","_col1"]
+ | | Group By Operator [GBY_90]
| | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
| | |<-Union 29 [SIMPLE_EDGE]
- | | |<-Map 34 [CONTAINS]
- | | | Reduce Output Operator [RS_96]
+ | | |<-Map 35 [CONTAINS]
+ | | | Reduce Output Operator [RS_89]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_95]
+ | | | Group By Operator [GBY_88]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_91]
+ | | | Select Operator [SEL_84]
| | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_156]
+ | | | Filter Operator [FIL_162]
| | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_90]
+ | | | TableScan [TS_83]
| | | alias:y
| | |<-Reducer 28 [CONTAINS]
- | | Reduce Output Operator [RS_96]
+ | | Reduce Output Operator [RS_89]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_95]
+ | | Group By Operator [GBY_88]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Group By Operator [GBY_88]
+ | | Group By Operator [GBY_81]
| | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
| | |<-Union 27 [SIMPLE_EDGE]
- | | |<-Map 33 [CONTAINS]
- | | | Reduce Output Operator [RS_87]
+ | | |<-Map 34 [CONTAINS]
+ | | | Reduce Output Operator [RS_80]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_86]
+ | | | Group By Operator [GBY_79]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_82]
+ | | | Select Operator [SEL_75]
| | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_155]
+ | | | Filter Operator [FIL_161]
| | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_81]
+ | | | TableScan [TS_74]
| | | alias:y
- | | |<-Reducer 26 [CONTAINS]
- | | Reduce Output Operator [RS_87]
+ | | |<-Map 26 [CONTAINS]
+ | | Reduce Output Operator [RS_80]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_86]
- | | keys:_col0 (type: string), _col1 (type: string)
- | | outputColumnNames:["_col0","_col1"]
| | Group By Operator [GBY_79]
- | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
- | | | outputColumnNames:["_col0","_col1"]
- | | |<-Union 25 [SIMPLE_EDGE]
- | | |<-Map 24 [CONTAINS]
- | | | Reduce Output Operator [RS_78]
- | | | key expressions:_col0 (type: string), _col1 (type: string)
- | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | | sort order:++
- | | | Group By Operator [GBY_77]
- | | | keys:_col0 (type: string), _col1 (type: string)
- | | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_71]
- | | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_153]
- | | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_70]
- | | | alias:x
- | | |<-Map 32 [CONTAINS]
- | | Reduce Output Operator [RS_78]
- | | key expressions:_col0 (type: string), _col1 (type: string)
- | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | sort order:++
- | | Group By Operator [GBY_77]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
| | Select Operator [SEL_73]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_154]
+ | | Filter Operator [FIL_160]
| | predicate:value is not null (type: boolean)
| | TableScan [TS_72]
- | | alias:y
- | |<-Reducer 36 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_112]
- | key expressions:_col3 (type: string)
- | Map-reduce partition columns:_col3 (type: string)
- | sort order:+
- | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string), _col2 (type: string)
- | Merge Join Operator [MERGEJOIN_161]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
- | | outputColumnNames:["_col1","_col2","_col3"]
- | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 35 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_104]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string)
- | | Select Operator [SEL_100]
- | | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_157]
- | | predicate:key is not null (type: boolean)
- | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_99]
- | | alias:y
- | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 37 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_106]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Select Operator [SEL_102]
+ | | alias:x
+ | |<-Map 36 [CONTAINS]
+ | Reduce Output Operator [RS_98]
+ | key expressions:_col0 (type: string), _col1 (type: string)
+ | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ | sort order:++
+ | Group By Operator [GBY_97]
+ | keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_158]
- | predicate:(key is not null and value is not null) (type: boolean)
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_101]
- | alias:x
- | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 6 [CONTAINS]
+ | Select Operator [SEL_93]
+ | outputColumnNames:["_col0","_col1"]
+ | Filter Operator [FIL_163]
+ | predicate:value is not null (type: boolean)
+ | TableScan [TS_92]
+ | alias:y
+ |<-Reducer 4 [CONTAINS]
Reduce Output Operator [RS_119]
key expressions:_col0 (type: string), _col1 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -1044,8 +1043,8 @@
Group By Operator [GBY_68]
| keys:KEY._col0 (type: string), KEY._col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- |<-Union 5 [SIMPLE_EDGE]
- |<-Reducer 4 [CONTAINS]
+ |<-Union 3 [SIMPLE_EDGE]
+ |<-Reducer 14 [CONTAINS]
| Reduce Output Operator [RS_67]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -1053,224 +1052,222 @@
| Group By Operator [GBY_66]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Select Operator [SEL_26]
+ | Select Operator [SEL_62]
| outputColumnNames:["_col0","_col1"]
- | Merge Join Operator [MERGEJOIN_162]
+ | Merge Join Operator [MERGEJOIN_168]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"}
- | | outputColumnNames:["_col2","_col3"]
- | |<-Reducer 11 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_24]
- | | key expressions:_col3 (type: string)
- | | Map-reduce partition columns:_col3 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string), _col2 (type: string)
- | | Merge Join Operator [MERGEJOIN_159]
- | | | condition map:[{"":"Inner Join 0 to 1"}]
- | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
- | | | outputColumnNames:["_col1","_col2","_col3"]
- | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | |<-Map 10 [SIMPLE_EDGE]
- | | | Reduce Output Operator [RS_16]
- | | | key expressions:_col0 (type: string)
- | | | Map-reduce partition columns:_col0 (type: string)
- | | | sort order:+
- | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | value expressions:_col1 (type: string)
- | | | Select Operator [SEL_12]
- | | | outputColumnNames:["_col0","_col1"]
- | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | Filter Operator [FIL_146]
- | | | predicate:key is not null (type: boolean)
- | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | TableScan [TS_11]
- | | | alias:y
- | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | | |<-Map 12 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_18]
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1","_col3"]
+ | |<-Map 13 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_58]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| | value expressions:_col1 (type: string)
- | | Select Operator [SEL_14]
+ | | Select Operator [SEL_28]
| | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_154]
+ | | predicate:key is not null (type: boolean)
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_27]
+ | | alias:y
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | |<-Reducer 20 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_60]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ | Merge Join Operator [MERGEJOIN_167]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 23 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_54]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
| | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_147]
- | | predicate:(key is not null and value is not null) (type: boolean)
+ | | value expressions:_col0 (type: string)
+ | | Select Operator [SEL_50]
+ | | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_13]
+ | | Filter Operator [FIL_158]
+ | | predicate:(value is not null and key is not null) (type: boolean)
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_49]
| | alias:x
| | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | |<-Reducer 3 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_22]
+ | |<-Reducer 19 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_52]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
- | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_10]
- | outputColumnNames:["_col0"]
- | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Group By Operator [GBY_9]
- | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
- | | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | |<-Union 2 [SIMPLE_EDGE]
- | |<-Map 1 [CONTAINS]
- | | Reduce Output Operator [RS_8]
- | | key expressions:_col0 (type: string), _col1 (type: string)
- | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | sort order:++
- | | Group By Operator [GBY_7]
- | | keys:_col0 (type: string), _col1 (type: string)
- | | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_1]
- | | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_144]
- | | predicate:value is not null (type: boolean)
- | | TableScan [TS_0]
- | | alias:x
- | |<-Map 9 [CONTAINS]
- | Reduce Output Operator [RS_8]
- | key expressions:_col0 (type: string), _col1 (type: string)
- | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | sort order:++
- | Group By Operator [GBY_7]
- | keys:_col0 (type: string), _col1 (type: string)
- | outputColumnNames:["_col0","_col1"]
- | Select Operator [SEL_3]
- | outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_145]
- | predicate:value is not null (type: boolean)
- | TableScan [TS_2]
- | alias:y
- |<-Reducer 18 [CONTAINS]
- Reduce Output Operator [RS_67]
- key expressions:_col0 (type: string), _col1 (type: string)
- Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- sort order:++
- Group By Operator [GBY_66]
- keys:_col0 (type: string), _col1 (type: string)
- outputColumnNames:["_col0","_col1"]
- Select Operator [SEL_62]
- outputColumnNames:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_163]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"}
- | outputColumnNames:["_col2","_col3"]
- |<-Reducer 17 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_58]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
| Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_46]
+ | Select Operator [SEL_48]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | Group By Operator [GBY_45]
+ | Group By Operator [GBY_47]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | |<-Union 16 [SIMPLE_EDGE]
- | |<-Map 20 [CONTAINS]
- | | Reduce Output Operator [RS_44]
+ | |<-Union 18 [SIMPLE_EDGE]
+ | |<-Map 22 [CONTAINS]
+ | | Reduce Output Operator [RS_46]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_43]
+ | | Group By Operator [GBY_45]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_39]
+ | | Select Operator [SEL_41]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_150]
+ | | Filter Operator [FIL_157]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_38]
+ | | TableScan [TS_40]
| | alias:y
- | |<-Reducer 15 [CONTAINS]
- | Reduce Output Operator [RS_44]
+ | |<-Reducer 17 [CONTAINS]
+ | Reduce Output Operator [RS_46]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Group By Operator [GBY_43]
+ | Group By Operator [GBY_45]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Group By Operator [GBY_36]
+ | Group By Operator [GBY_38]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | |<-Union 14 [SIMPLE_EDGE]
- | |<-Map 13 [CONTAINS]
- | | Reduce Output Operator [RS_35]
+ | |<-Union 16 [SIMPLE_EDGE]
+ | |<-Map 21 [CONTAINS]
+ | | Reduce Output Operator [RS_37]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_34]
+ | | Group By Operator [GBY_36]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_28]
+ | | Select Operator [SEL_32]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_148]
+ | | Filter Operator [FIL_156]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_27]
- | | alias:x
- | |<-Map 19 [CONTAINS]
- | Reduce Output Operator [RS_35]
+ | | TableScan [TS_31]
+ | | alias:y
+ | |<-Map 15 [CONTAINS]
+ | Reduce Output Operator [RS_37]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Group By Operator [GBY_34]
+ | Group By Operator [GBY_36]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_30]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_149]
+ | Filter Operator [FIL_155]
| predicate:value is not null (type: boolean)
| TableScan [TS_29]
- | alias:y
- |<-Reducer 22 [SIMPLE_EDGE]
- Reduce Output Operator [RS_60]
- key expressions:_col3 (type: string)
- Map-reduce partition columns:_col3 (type: string)
- sort order:+
- Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col1 (type: string), _col2 (type: string)
- Merge Join Operator [MERGEJOIN_160]
+ | alias:x
+ |<-Reducer 2 [CONTAINS]
+ Reduce Output Operator [RS_67]
+ key expressions:_col0 (type: string), _col1 (type: string)
+ Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ sort order:++
+ Group By Operator [GBY_66]
+ keys:_col0 (type: string), _col1 (type: string)
+ outputColumnNames:["_col0","_col1"]
+ Select Operator [SEL_26]
+ outputColumnNames:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_166]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
- | outputColumnNames:["_col1","_col2","_col3"]
- | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- |<-Map 21 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_52]
+ | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | outputColumnNames:["_col1","_col3"]
+ |<-Map 1 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_22]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| value expressions:_col1 (type: string)
- | Select Operator [SEL_48]
+ | Select Operator [SEL_1]
| outputColumnNames:["_col0","_col1"]
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_151]
+ | Filter Operator [FIL_150]
| predicate:key is not null (type: boolean)
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_47]
+ | TableScan [TS_0]
| alias:y
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- |<-Map 23 [SIMPLE_EDGE]
- Reduce Output Operator [RS_54]
+ |<-Reducer 10 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_24]
+ key expressions:_col1 (type: string)
+ Map-reduce partition columns:_col1 (type: string)
+ sort order:+
+ Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator [MERGEJOIN_165]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | outputColumnNames:["_col1"]
+ | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 12 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_18]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col0 (type: string)
+ | Select Operator [SEL_14]
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_153]
+ | predicate:(value is not null and key is not null) (type: boolean)
+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_13]
+ | alias:x
+ | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 9 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_16]
key expressions:_col0 (type: string)
Map-reduce partition columns:_col0 (type: string)
sort order:+
- Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col1 (type: string)
- Select Operator [SEL_50]
+ Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
+ Select Operator [SEL_12]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator [GBY_11]
+ | keys:KEY._col0 (type: string), KEY._col1 (type: string)
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
+ |<-Union 8 [SIMPLE_EDGE]
+ |<-Map 11 [CONTAINS]
+ | Reduce Output Operator [RS_10]
+ | key expressions:_col0 (type: string), _col1 (type: string)
+ | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ | sort order:++
+ | Group By Operator [GBY_9]
+ | keys:_col0 (type: string), _col1 (type: string)
+ | outputColumnNames:["_col0","_col1"]
+ | Select Operator [SEL_5]
+ | outputColumnNames:["_col0","_col1"]
+ | Filter Operator [FIL_152]
+ | predicate:value is not null (type: boolean)
+ | TableScan [TS_4]
+ | alias:y
+ |<-Map 7 [CONTAINS]
+ Reduce Output Operator [RS_10]
+ key expressions:_col0 (type: string), _col1 (type: string)
+ Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ sort order:++
+ Group By Operator [GBY_9]
+ keys:_col0 (type: string), _col1 (type: string)
outputColumnNames:["_col0","_col1"]
- Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- Filter Operator [FIL_152]
- predicate:(key is not null and value is not null) (type: boolean)
- Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- TableScan [TS_49]
+ Select Operator [SEL_3]
+ outputColumnNames:["_col0","_col1"]
+ Filter Operator [FIL_151]
+ predicate:value is not null (type: boolean)
+ TableScan [TS_2]
alias:x
- Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
PREHOOK: query: EXPLAIN
SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
@@ -1301,7 +1298,7 @@
Map Join Operator [MAPJOIN_29]
| condition map:[{"":"Inner Join 0 to 1"}]
| keys:{"Map 1":"_col0 (type: string)","Map 2":"_col3 (type: string)"}
- | outputColumnNames:["_col1","_col2","_col5"]
+ | outputColumnNames:["_col0","_col4","_col5"]
| Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
|<-Map 1 [BROADCAST_EDGE]
| Reduce Output Operator [RS_14]
@@ -1309,45 +1306,44 @@
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Select Operator [SEL_1]
- | outputColumnNames:["_col0","_col1"]
+ | Select Operator [SEL_2]
+ | outputColumnNames:["_col0"]
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| Filter Operator [FIL_25]
- | predicate:key is not null (type: boolean)
+ | predicate:value is not null (type: boolean)
| Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| TableScan [TS_0]
- | alias:y
+ | alias:z
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
|<-Map Join Operator [MAPJOIN_28]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col1 (type: string)"}
- | outputColumnNames:["_col0","_col3"]
+ | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col0 (type: string)"}
+ | outputColumnNames:["_col1","_col2","_col3"]
| Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
|<-Map 3 [BROADCAST_EDGE]
| Reduce Output Operator [RS_10]
- | key expressions:_col1 (type: string)
- | Map-reduce partition columns:_col1 (type: string)
+ | key expressions:_col0 (type: string)
+ | Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col0 (type: string)
+ | value expressions:_col1 (type: string)
| Select Operator [SEL_6]
| outputColumnNames:["_col0","_col1"]
| Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
| Filter Operator [FIL_27]
- | predicate:(value is not null and key is not null) (type: boolean)
+ | predicate:(key is not null and value is not null) (type: boolean)
| Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
| TableScan [TS_5]
| alias:x
| Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
|<-Select Operator [SEL_4]
- outputColumnNames:["_col0"]
+ outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Filter Operator [FIL_26]
- predicate:value is not null (type: boolean)
+ predicate:key is not null (type: boolean)
Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- TableScan [TS_2]
- alias:z
+ TableScan [TS_3]
+ alias:y
Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
PREHOOK: query: EXPLAIN
select
@@ -1408,17 +1404,17 @@
Plan optimized by CBO.
Vertex dependency in root stage
-Map 2 <- Map 1 (BROADCAST_EDGE)
-Map 10 <- Map 9 (BROADCAST_EDGE)
-Map 5 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE)
-Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
-Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+Map 4 <- Map 3 (BROADCAST_EDGE)
+Map 7 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:100
Stage-1
- Reducer 7
+ Reducer 9
File Output Operator [FS_71]
compressed:false
Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
@@ -1428,52 +1424,94 @@
Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE
Select Operator [SEL_69]
| outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE
- |<-Reducer 6 [SIMPLE_EDGE]
+ | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 8 [SIMPLE_EDGE]
Reduce Output Operator [RS_68]
key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order:+++
- Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE
value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint)
Group By Operator [GBY_66]
| aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"]
| keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
| outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE
- |<-Map 5 [SIMPLE_EDGE]
+ | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE
+ |<-Map 7 [SIMPLE_EDGE]
Reduce Output Operator [RS_65]
key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order:+++
- Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint)
Group By Operator [GBY_64]
aggregations:["count(_col3)","count(_col4)","count(_col5)"]
keys:_col0 (type: string), _col1 (type: string), _col2 (type: string)
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
Select Operator [SEL_62]
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator [MAPJOIN_113]
+ Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator [MAPJOIN_111]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 5":"_col15 (type: string), _col17 (type: string)"}
- | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"]
- | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE
+ | keys:{"Map 2":"_col8 (type: string), _col10 (type: string)","Map 7":"_col8 (type: string), _col10 (type: string)"}
+ | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"]
+ | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
|<-Map 2 [BROADCAST_EDGE]
| Reduce Output Operator [RS_58]
- | key expressions:_col1 (type: string), _col3 (type: string)
- | Map-reduce partition columns:_col1 (type: string), _col3 (type: string)
+ | key expressions:_col8 (type: string), _col10 (type: string)
+ | Map-reduce partition columns:_col8 (type: string), _col10 (type: string)
| sort order:++
- | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col2 (type: string)
- | Map Join Operator [MAPJOIN_107]
+ | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string)
+ | Map Join Operator [MAPJOIN_110]
| | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 4":"_col3 (type: string), _col5 (type: string)"}
+ | | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"]
+ | | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 4 [BROADCAST_EDGE]
+ | | Reduce Output Operator [RS_55]
+ | | key expressions:_col3 (type: string), _col5 (type: string)
+ | | Map-reduce partition columns:_col3 (type: string), _col5 (type: string)
+ | | sort order:++
+ | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col2 (type: string), _col4 (type: string)
+ | | Map Join Operator [MAPJOIN_106]
+ | | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | | keys:{"Map 3":"_col0 (type: string)","Map 4":"_col0 (type: string)"}
+ | | | outputColumnNames:["_col2","_col3","_col4","_col5"]
+ | | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Map 3 [BROADCAST_EDGE]
+ | | | Reduce Output Operator [RS_12]
+ | | | key expressions:_col0 (type: string)
+ | | | Map-reduce partition columns:_col0 (type: string)
+ | | | sort order:+
+ | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ | | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+ | | | Select Operator [SEL_7]
+ | | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"]
+ | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ | | | Filter Operator [FIL_99]
+ | | | predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean)
+ | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ | | | TableScan [TS_5]
+ | | | alias:sr
+ | | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Select Operator [SEL_10]
+ | | outputColumnNames:["_col0"]
+ | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_100]
+ | | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean)
+ | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_8]
+ | | alias:d1
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map Join Operator [MAPJOIN_105]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
| | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"}
| | outputColumnNames:["_col1","_col2","_col3"]
| | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
| |<-Map 1 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_53]
+ | | Reduce Output Operator [RS_48]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
@@ -1482,7 +1520,7 @@
| | Select Operator [SEL_1]
| | outputColumnNames:["_col0","_col1","_col2","_col3"]
| | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_99]
+ | | Filter Operator [FIL_97]
| | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean)
| | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE
| | TableScan [TS_0]
@@ -1491,125 +1529,83 @@
| |<-Select Operator [SEL_4]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_100]
+ | Filter Operator [FIL_98]
| predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean)
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
| TableScan [TS_2]
| alias:d1
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- |<-Select Operator [SEL_51]
- outputColumnNames:["_col14","_col15","_col17","_col6","_col7"]
- Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator [MAPJOIN_112]
+ |<-Select Operator [SEL_46]
+ outputColumnNames:["_col10","_col6","_col7","_col8"]
+ Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator [MAPJOIN_109]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 10":"_col2 (type: string), _col4 (type: string)","Map 5":"_col8 (type: string), _col10 (type: string)"}
- | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"]
- | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE
- |<-Map 10 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_49]
- | key expressions:_col2 (type: string), _col4 (type: string)
- | Map-reduce partition columns:_col2 (type: string), _col4 (type: string)
- | sort order:++
- | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col3 (type: string), _col5 (type: string)
- | Map Join Operator [MAPJOIN_111]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 10":"_col0 (type: string)","Map 9":"_col0 (type: string)"}
- | | outputColumnNames:["_col2","_col3","_col4","_col5"]
- | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 9 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_36]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
- | | Select Operator [SEL_31]
- | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"]
- | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_105]
- | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean)
- | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_29]
- | | alias:sr
- | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE
- | |<-Select Operator [SEL_34]
- | outputColumnNames:["_col0"]
- | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_106]
- | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean)
- | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_32]
- | alias:d1
- | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- |<-Map Join Operator [MAPJOIN_110]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 3":"_col1 (type: string)","Map 5":"_col5 (type: string)"}
+ | keys:{"Map 5":"_col1 (type: string)","Map 7":"_col5 (type: string)"}
| outputColumnNames:["_col6","_col7","_col8","_col10"]
| Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE
- |<-Map 3 [BROADCAST_EDGE]
+ |<-Map 5 [BROADCAST_EDGE]
| Reduce Output Operator [RS_42]
| key expressions:_col1 (type: string)
| Map-reduce partition columns:_col1 (type: string)
| sort order:+
| Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_7]
+ | Select Operator [SEL_19]
| outputColumnNames:["_col1"]
| Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
| Filter Operator [FIL_101]
| predicate:((key = 'src1key') and value is not null) (type: boolean)
| Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_5]
+ | TableScan [TS_17]
| alias:src1
| Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- |<-Map Join Operator [MAPJOIN_109]
+ |<-Map Join Operator [MAPJOIN_108]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 5":"_col2 (type: string)","Map 4":"_col0 (type: string)"}
+ | keys:{"Map 7":"_col2 (type: string)","Map 6":"_col0 (type: string)"}
| outputColumnNames:["_col4","_col5","_col6","_col8"]
| Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
- |<-Map 4 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_24]
+ |<-Map 6 [BROADCAST_EDGE]
+ | Reduce Output Operator [RS_36]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_10]
+ | Select Operator [SEL_22]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
| Filter Operator [FIL_102]
| predicate:((value = 'd1value') and key is not null) (type: boolean)
| Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_8]
+ | TableScan [TS_20]
| alias:d1
| Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- |<-Map Join Operator [MAPJOIN_108]
+ |<-Map Join Operator [MAPJOIN_107]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 5":"_col1 (type: string)","Map 8":"_col3 (type: string)"}
+ | keys:{"Map 10":"_col3 (type: string)","Map 7":"_col1 (type: string)"}
| outputColumnNames:["_col2","_col3","_col4","_col6"]
| Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- |<-Map 8 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_20]
+ |<-Map 10 [BROADCAST_EDGE]
+ | Reduce Output Operator [RS_32]
| key expressions:_col3 (type: string)
| Map-reduce partition columns:_col3 (type: string)
| sort order:+
| Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
| value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string)
- | Select Operator [SEL_16]
+ | Select Operator [SEL_28]
| outputColumnNames:["_col0","_col1","_col2","_col3","_col4"]
| Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
| Filter Operator [FIL_104]
| predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean)
| Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_14]
+ | TableScan [TS_26]
| alias:ss
| Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE
- |<-Select Operator [SEL_13]
+ |<-Select Operator [SEL_25]
outputColumnNames:["_col1"]
Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator [FIL_103]
predicate:((key = 'srcpartkey') and value is not null) (type: boolean)
Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- TableScan [TS_11]
+ TableScan [TS_23]
alias:srcpart
Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
PREHOOK: query: explain
@@ -1633,31 +1629,31 @@
Plan optimized by CBO.
Vertex dependency in root stage
-Map 12 <- Union 10 (CONTAINS)
Reducer 3 <- Union 2 (SIMPLE_EDGE)
-Reducer 11 <- Union 10 (SIMPLE_EDGE)
-Map 13 <- Map 14 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Union 6 (CONTAINS)
-Map 1 <- Union 2 (CONTAINS)
-Map 5 <- Map 8 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS)
-Map 4 <- Union 2 (CONTAINS)
-Reducer 7 <- Union 6 (SIMPLE_EDGE)
-Map 9 <- Union 10 (CONTAINS)
+Reducer 12 <- Map 14 (BROADCAST_EDGE), Union 11 (SIMPLE_EDGE)
+Map 13 <- Union 11 (CONTAINS)
+Map 1 <- Reducer 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
+Map 10 <- Union 11 (CONTAINS)
+Map 4 <- Union 5 (CONTAINS)
+Map 7 <- Union 5 (CONTAINS)
+Reducer 6 <- Map 8 (BROADCAST_EDGE), Union 5 (SIMPLE_EDGE)
+Map 9 <- Reducer 12 (BROADCAST_EDGE), Union 2 (CONTAINS)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 7
+ Reducer 3
File Output Operator [FS_61]
compressed:false
- Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Group By Operator [GBY_59]
| keys:KEY._col0 (type: string), KEY._col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
- |<-Union 6 [SIMPLE_EDGE]
- |<-Map 13 [CONTAINS]
+ | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ |<-Union 2 [SIMPLE_EDGE]
+ |<-Map 1 [CONTAINS]
| Reduce Output Operator [RS_58]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -1665,161 +1661,165 @@
| Group By Operator [GBY_57]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Select Operator [SEL_53]
+ | Select Operator [SEL_26]
| outputColumnNames:["_col0","_col1"]
| Map Join Operator [MAPJOIN_85]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Reducer 11":"_col0 (type: string)","Map 13":"_col2 (type: string)"}
- | | outputColumnNames:["_col0","_col2"]
- | |<-Reducer 11 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_49]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
+ | | keys:{"Map 1":"_col0 (type: string)","Reducer 6":"_col1 (type: string)"}
+ | | outputColumnNames:["_col1","_col2"]
+ | |<-Reducer 6 [BROADCAST_EDGE]
+ | | Reduce Output Operator [RS_24]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
- | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | | Select Operator [SEL_37]
+ | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Map Join Operator [MAPJOIN_84]
+ | | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | | keys:{"Reducer 6":"_col0 (type: string)","Map 8":"_col1 (type: string)"}
+ | | | outputColumnNames:["_col0","_col1"]
+ | | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Map 8 [BROADCAST_EDGE]
+ | | | Reduce Output Operator [RS_18]
+ | | | key expressions:_col1 (type: string)
+ | | | Map-reduce partition columns:_col1 (type: string)
+ | | | sort order:+
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | value expressions:_col0 (type: string)
+ | | | Select Operator [SEL_14]
+ | | | outputColumnNames:["_col0","_col1"]
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | Filter Operator [FIL_79]
+ | | | predicate:(value is not null and key is not null) (type: boolean)
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | TableScan [TS_13]
+ | | | alias:x
+ | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Select Operator [SEL_12]
| | outputColumnNames:["_col0"]
| | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | | Group By Operator [GBY_36]
+ | | Group By Operator [GBY_11]
| | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
| | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | | |<-Union 10 [SIMPLE_EDGE]
- | | |<-Map 12 [CONTAINS]
- | | | Reduce Output Operator [RS_35]
+ | | |<-Union 5 [SIMPLE_EDGE]
+ | | |<-Map 4 [CONTAINS]
+ | | | Reduce Output Operator [RS_10]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_34]
+ | | | Group By Operator [GBY_9]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_30]
+ | | | Select Operator [SEL_3]
| | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_79]
+ | | | Filter Operator [FIL_77]
| | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_29]
- | | | alias:y
- | | |<-Map 9 [CONTAINS]
- | | Reduce Output Operator [RS_35]
+ | | | TableScan [TS_2]
+ | | | alias:x
+ | | |<-Map 7 [CONTAINS]
+ | | Reduce Output Operator [RS_10]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_34]
+ | | Group By Operator [GBY_9]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_28]
+ | | Select Operator [SEL_5]
| | outputColumnNames:["_col0","_col1"]
| | Filter Operator [FIL_78]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_27]
- | | alias:x
- | |<-Map Join Operator [MAPJOIN_83]
+ | | TableScan [TS_4]
+ | | alias:y
+ | |<-Select Operator [SEL_1]
+ | outputColumnNames:["_col0"]
+ | Filter Operator [FIL_76]
+ | predicate:key is not null (type: boolean)
+ | TableScan [TS_0]
+ | alias:y
+ |<-Map 9 [CONTAINS]
+ Reduce Output Operator [RS_58]
+ key expressions:_col0 (type: string), _col1 (type: string)
+ Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ sort order:++
+ Group By Operator [GBY_57]
+ keys:_col0 (type: string), _col1 (type: string)
+ outputColumnNames:["_col0","_col1"]
+ Select Operator [SEL_53]
+ outputColumnNames:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_87]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"Reducer 12":"_col1 (type: string)","Map 9":"_col0 (type: string)"}
+ | outputColumnNames:["_col1","_col2"]
+ |<-Reducer 12 [BROADCAST_EDGE]
+ | Reduce Output Operator [RS_51]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col0 (type: string)
+ | Map Join Operator [MAPJOIN_86]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 14":"_col0 (type: string)","Map 13":"_col0 (type: string)"}
- | | outputColumnNames:["_col1","_col2"]
+ | | keys:{"Map 14":"_col1 (type: string)","Reducer 12":"_col0 (type: string)"}
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
| |<-Map 14 [BROADCAST_EDGE]
| | Reduce Output Operator [RS_45]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
| | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string)
+ | | value expressions:_col0 (type: string)
| | Select Operator [SEL_41]
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_81]
- | | predicate:(key is not null and value is not null) (type: boolean)
+ | | Filter Operator [FIL_83]
+ | | predicate:(value is not null and key is not null) (type: boolean)
| | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
| | TableScan [TS_40]
| | alias:x
| | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
| |<-Select Operator [SEL_39]
| outputColumnNames:["_col0"]
- | Filter Operator [FIL_80]
- | predicate:key is not null (type: boolean)
- | TableScan [TS_38]
- | alias:y
- |<-Map 5 [CONTAINS]
- Reduce Output Operator [RS_58]
- key expressions:_col0 (type: string), _col1 (type: string)
- Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- sort order:++
- Group By Operator [GBY_57]
- keys:_col0 (type: string), _col1 (type: string)
- outputColumnNames:["_col0","_col1"]
- Select Operator [SEL_26]
- outputColumnNames:["_col0","_col1"]
- Map Join Operator [MAPJOIN_84]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col2 (type: string)"}
- | outputColumnNames:["_col0","_col2"]
- |<-Reducer 3 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_22]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
| Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_10]
- | outputColumnNames:["_col0"]
- | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Group By Operator [GBY_9]
+ | Group By Operator [GBY_38]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | |<-Union 2 [SIMPLE_EDGE]
- | |<-Map 1 [CONTAINS]
- | | Reduce Output Operator [RS_8]
+ | |<-Union 11 [SIMPLE_EDGE]
+ | |<-Map 13 [CONTAINS]
+ | | Reduce Output Operator [RS_37]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_7]
+ | | Group By Operator [GBY_36]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_1]
+ | | Select Operator [SEL_32]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_74]
+ | | Filter Operator [FIL_82]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_0]
- | | alias:x
- | |<-Map 4 [CONTAINS]
- | Reduce Output Operator [RS_8]
+ | | TableScan [TS_31]
+ | | alias:y
+ | |<-Map 10 [CONTAINS]
+ | Reduce Output Operator [RS_37]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Group By Operator [GBY_7]
+ | Group By Operator [GBY_36]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Select Operator [SEL_3]
+ | Select Operator [SEL_30]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_75]
+ | Filter Operator [FIL_81]
| predicate:value is not null (type: boolean)
- | TableScan [TS_2]
- | alias:y
- |<-Map Join Operator [MAPJOIN_82]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 5":"_col0 (type: string)","Map 8":"_col0 (type: string)"}
- | outputColumnNames:["_col1","_col2"]
- |<-Map 8 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_18]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Select Operator [SEL_14]
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_77]
- | predicate:(key is not null and value is not null) (type: boolean)
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_13]
+ | TableScan [TS_29]
| alias:x
- | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- |<-Select Operator [SEL_12]
+ |<-Select Operator [SEL_28]
outputColumnNames:["_col0"]
- Filter Operator [FIL_76]
+ Filter Operator [FIL_80]
predicate:key is not null (type: boolean)
- TableScan [TS_11]
+ TableScan [TS_27]
alias:y
PREHOOK: query: explain
SELECT x.key, y.value
@@ -1850,42 +1850,42 @@
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS)
-Reducer 13 <- Union 12 (SIMPLE_EDGE), Union 14 (CONTAINS)
-Map 30 <- Map 31 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 8 (CONTAINS)
-Map 11 <- Union 12 (CONTAINS)
-Reducer 24 <- Union 23 (SIMPLE_EDGE), Union 25 (CONTAINS)
-Map 1 <- Union 2 (CONTAINS)
-Map 20 <- Union 21 (CONTAINS)
-Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS)
-Reducer 9 <- Union 8 (SIMPLE_EDGE)
-Reducer 26 <- Union 25 (SIMPLE_EDGE)
-Map 16 <- Union 12 (CONTAINS)
-Map 29 <- Union 25 (CONTAINS)
-Map 28 <- Union 23 (CONTAINS)
-Reducer 15 <- Union 14 (SIMPLE_EDGE)
-Map 18 <- Map 19 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Union 6 (CONTAINS)
-Map 27 <- Union 21 (CONTAINS)
-Map 17 <- Union 14 (CONTAINS)
-Reducer 3 <- Union 2 (SIMPLE_EDGE)
-Map 5 <- Map 10 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS)
-Map 4 <- Union 2 (CONTAINS)
+Map 12 <- Union 13 (CONTAINS)
+Map 30 <- Union 26 (CONTAINS)
+Reducer 23 <- Union 22 (SIMPLE_EDGE), Union 24 (CONTAINS)
+Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS)
+Map 11 <- Reducer 16 (BROADCAST_EDGE), Union 2 (CONTAINS)
+Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS)
+Map 21 <- Union 22 (CONTAINS)
+Map 1 <- Reducer 8 (BROADCAST_EDGE), Union 2 (CONTAINS)
+Map 20 <- Reducer 27 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Reducer 5 <- Union 4 (SIMPLE_EDGE)
+Map 29 <- Union 24 (CONTAINS)
+Reducer 8 <- Map 10 (BROADCAST_EDGE), Union 7 (SIMPLE_EDGE)
+Reducer 27 <- Map 31 (BROADCAST_EDGE), Union 26 (SIMPLE_EDGE)
+Map 28 <- Union 22 (CONTAINS)
+Map 18 <- Union 15 (CONTAINS)
+Reducer 16 <- Map 19 (BROADCAST_EDGE), Union 15 (SIMPLE_EDGE)
+Map 17 <- Union 13 (CONTAINS)
+Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+Map 6 <- Union 7 (CONTAINS)
+Map 9 <- Union 7 (CONTAINS)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 9
+ Reducer 5
File Output Operator [FS_122]
compressed:false
- Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Group By Operator [GBY_120]
| keys:KEY._col0 (type: string), KEY._col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
- |<-Union 8 [SIMPLE_EDGE]
- |<-Map 30 [CONTAINS]
+ | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ |<-Union 4 [SIMPLE_EDGE]
+ |<-Map 20 [CONTAINS]
| Reduce Output Operator [RS_119]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -1895,131 +1895,132 @@
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_114]
| outputColumnNames:["_col0","_col1"]
- | Map Join Operator [MAPJOIN_164]
+ | Map Join Operator [MAPJOIN_170]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 30":"_col3 (type: string)","Reducer 26":"_col0 (type: string)"}
- | | outputColumnNames:["_col2","_col3"]
- | |<-Reducer 26 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_110]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
+ | | keys:{"Map 20":"_col0 (type: string)","Reducer 27":"_col1 (type: string)"}
+ | | outputColumnNames:["_col1","_col3"]
+ | |<-Reducer 27 [BROADCAST_EDGE]
+ | | Reduce Output Operator [RS_112]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
- | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
- | | Select Operator [SEL_98]
+ | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE
+ | | Map Join Operator [MAPJOIN_169]
+ | | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | | keys:{"Map 31":"_col1 (type: string)","Reducer 27":"_col0 (type: string)"}
+ | | | outputColumnNames:["_col1"]
+ | | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Map 31 [BROADCAST_EDGE]
+ | | | Reduce Output Operator [RS_106]
+ | | | key expressions:_col1 (type: string)
+ | | | Map-reduce partition columns:_col1 (type: string)
+ | | | sort order:+
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | value expressions:_col0 (type: string)
+ | | | Select Operator [SEL_102]
+ | | | outputColumnNames:["_col0","_col1"]
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | Filter Operator [FIL_164]
+ | | | predicate:(value is not null and key is not null) (type: boolean)
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | TableScan [TS_101]
+ | | | alias:x
+ | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Select Operator [SEL_100]
| | outputColumnNames:["_col0"]
| | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
- | | Group By Operator [GBY_97]
+ | | Group By Operator [GBY_99]
| | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
| | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE
- | | |<-Union 25 [SIMPLE_EDGE]
- | | |<-Reducer 24 [CONTAINS]
- | | | Reduce Output Operator [RS_96]
+ | | |<-Union 26 [SIMPLE_EDGE]
+ | | |<-Map 30 [CONTAINS]
+ | | | Reduce Output Operator [RS_98]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_95]
+ | | | Group By Operator [GBY_97]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
+ | | | Select Operator [SEL_93]
+ | | | outputColumnNames:["_col0","_col1"]
+ | | | Filter Operator [FIL_163]
+ | | | predicate:value is not null (type: boolean)
+ | | | TableScan [TS_92]
+ | | | alias:y
+ | | |<-Reducer 25 [CONTAINS]
+ | | Reduce Output Operator [RS_98]
+ | | key expressions:_col0 (type: string), _col1 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ | | sort order:++
+ | | Group By Operator [GBY_97]
+ | | keys:_col0 (type: string), _col1 (type: string)
+ | | outputColumnNames:["_col0","_col1"]
+ | | Group By Operator [GBY_90]
+ | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
+ | | | outputColumnNames:["_col0","_col1"]
+ | | |<-Union 24 [SIMPLE_EDGE]
+ | | |<-Reducer 23 [CONTAINS]
+ | | | Reduce Output Operator [RS_89]
+ | | | key expressions:_col0 (type: string), _col1 (type: string)
+ | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ | | | sort order:++
| | | Group By Operator [GBY_88]
+ | | | keys:_col0 (type: string), _col1 (type: string)
+ | | | outputColumnNames:["_col0","_col1"]
+ | | | Group By Operator [GBY_81]
| | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | | outputColumnNames:["_col0","_col1"]
- | | | |<-Union 23 [SIMPLE_EDGE]
- | | | |<-Reducer 22 [CONTAINS]
- | | | | Reduce Output Operator [RS_87]
+ | | | |<-Union 22 [SIMPLE_EDGE]
+ | | | |<-Map 21 [CONTAINS]
+ | | | | Reduce Output Operator [RS_80]
| | | | key expressions:_col0 (type: string), _col1 (type: string)
| | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | | sort order:++
- | | | | Group By Operator [GBY_86]
- | | | | keys:_col0 (type: string), _col1 (type: string)
- | | | | outputColumnNames:["_col0","_col1"]
| | | | Group By Operator [GBY_79]
- | | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
- | | | | | outputColumnNames:["_col0","_col1"]
- | | | | |<-Union 21 [SIMPLE_EDGE]
- | | | | |<-Map 20 [CONTAINS]
- | | | | | Reduce Output Operator [RS_78]
- | | | | | key expressions:_col0 (type: string), _col1 (type: string)
- | | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | | | | sort order:++
- | | | | | Group By Operator [GBY_77]
- | | | | | keys:_col0 (type: string), _col1 (type: string)
- | | | | | outputColumnNames:["_col0","_col1"]
- | | | | | Select Operator [SEL_71]
- | | | | | outputColumnNames:["_col0","_col1"]
- | | | | | Filter Operator [FIL_153]
- | | | | | predicate:value is not null (type: boolean)
- | | | | | TableScan [TS_70]
- | | | | | alias:x
- | | | | |<-Map 27 [CONTAINS]
- | | | | Reduce Output Operator [RS_78]
- | | | | key expressions:_col0 (type: string), _col1 (type: string)
- | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | | | sort order:++
- | | | | Group By Operator [GBY_77]
| | | | keys:_col0 (type: string), _col1 (type: string)
| | | | outputColumnNames:["_col0","_col1"]
| | | | Select Operator [SEL_73]
| | | | outputColumnNames:["_col0","_col1"]
- | | | | Filter Operator [FIL_154]
+ | | | | Filter Operator [FIL_160]
| | | | predicate:value is not null (type: boolean)
| | | | TableScan [TS_72]
- | | | | alias:y
+ | | | | alias:x
| | | |<-Map 28 [CONTAINS]
- | | | Reduce Output Operator [RS_87]
+ | | | Reduce Output Operator [RS_80]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_86]
+ | | | Group By Operator [GBY_79]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_82]
+ | | | Select Operator [SEL_75]
| | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_155]
+ | | | Filter Operator [FIL_161]
| | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_81]
+ | | | TableScan [TS_74]
| | | alias:y
| | |<-Map 29 [CONTAINS]
- | | Reduce Output Operator [RS_96]
+ | | Reduce Output Operator [RS_89]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_95]
+ | | Group By Operator [GBY_88]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_91]
+ | | Select Operator [SEL_84]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_156]
+ | | Filter Operator [FIL_162]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_90]
+ | | TableScan [TS_83]
| | alias:y
- | |<-Map Join Operator [MAPJOIN_161]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 30":"_col0 (type: string)","Map 31":"_col0 (type: string)"}
- | | outputColumnNames:["_col1","_col2","_col3"]
- | |<-Map 31 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_106]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string)
- | | Select Operator [SEL_102]
- | | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_158]
- | | predicate:(key is not null and value is not null) (type: boolean)
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_101]
- | | alias:x
- | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | |<-Select Operator [SEL_100]
+ | |<-Select Operator [SEL_71]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_157]
+ | Filter Operator [FIL_159]
| predicate:key is not null (type: boolean)
- | TableScan [TS_99]
+ | TableScan [TS_70]
| alias:y
- |<-Reducer 7 [CONTAINS]
+ |<-Reducer 3 [CONTAINS]
Reduce Output Operator [RS_119]
key expressions:_col0 (type: string), _col1 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -2030,8 +2031,8 @@
Group By Operator [GBY_68]
| keys:KEY._col0 (type: string), KEY._col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- |<-Union 6 [SIMPLE_EDGE]
- |<-Map 18 [CONTAINS]
+ |<-Union 2 [SIMPLE_EDGE]
+ |<-Map 11 [CONTAINS]
| Reduce Output Operator [RS_67]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -2041,105 +2042,106 @@
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_62]
| outputColumnNames:["_col0","_col1"]
- | Map Join Operator [MAPJOIN_163]
+ | Map Join Operator [MAPJOIN_168]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Reducer 15":"_col0 (type: string)","Map 18":"_col3 (type: string)"}
- | | outputColumnNames:["_col2","_col3"]
- | |<-Reducer 15 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_58]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
+ | | keys:{"Map 11":"_col0 (type: string)","Reducer 16":"_col1 (type: string)"}
+ | | outputColumnNames:["_col1","_col3"]
+ | |<-Reducer 16 [BROADCAST_EDGE]
+ | | Reduce Output Operator [RS_60]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
- | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | | Select Operator [SEL_46]
+ | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ | | Map Join Operator [MAPJOIN_167]
+ | | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | | keys:{"Map 19":"_col1 (type: string)","Reducer 16":"_col0 (type: string)"}
+ | | | outputColumnNames:["_col1"]
+ | | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Map 19 [BROADCAST_EDGE]
+ | | | Reduce Output Operator [RS_54]
+ | | | key expressions:_col1 (type: string)
+ | | | Map-reduce partition columns:_col1 (type: string)
+ | | | sort order:+
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | value expressions:_col0 (type: string)
+ | | | Select Operator [SEL_50]
+ | | | outputColumnNames:["_col0","_col1"]
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | Filter Operator [FIL_158]
+ | | | predicate:(value is not null and key is not null) (type: boolean)
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | TableScan [TS_49]
+ | | | alias:x
+ | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Select Operator [SEL_48]
| | outputColumnNames:["_col0"]
| | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | | Group By Operator [GBY_45]
+ | | Group By Operator [GBY_47]
| | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
| | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | | |<-Union 14 [SIMPLE_EDGE]
- | | |<-Reducer 13 [CONTAINS]
- | | | Reduce Output Operator [RS_44]
+ | | |<-Union 15 [SIMPLE_EDGE]
+ | | |<-Reducer 14 [CONTAINS]
+ | | | Reduce Output Operator [RS_46]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_43]
+ | | | Group By Operator [GBY_45]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Group By Operator [GBY_36]
+ | | | Group By Operator [GBY_38]
| | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | | outputColumnNames:["_col0","_col1"]
- | | | |<-Union 12 [SIMPLE_EDGE]
- | | | |<-Map 11 [CONTAINS]
- | | | | Reduce Output Operator [RS_35]
+ | | | |<-Union 13 [SIMPLE_EDGE]
+ | | | |<-Map 12 [CONTAINS]
+ | | | | Reduce Output Operator [RS_37]
| | | | key expressions:_col0 (type: string), _col1 (type: string)
| | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | | sort order:++
- | | | | Group By Operator [GBY_34]
+ | | | | Group By Operator [GBY_36]
| | | | keys:_col0 (type: string), _col1 (type: string)
| | | | outputColumnNames:["_col0","_col1"]
- | | | | Select Operator [SEL_28]
+ | | | | Select Operator [SEL_30]
| | | | outputColumnNames:["_col0","_col1"]
- | | | | Filter Operator [FIL_148]
+ | | | | Filter Operator [FIL_155]
| | | | predicate:value is not null (type: boolean)
- | | | | TableScan [TS_27]
+ | | | | TableScan [TS_29]
| | | | alias:x
- | | | |<-Map 16 [CONTAINS]
- | | | Reduce Output Operator [RS_35]
+ | | | |<-Map 17 [CONTAINS]
+ | | | Reduce Output Operator [RS_37]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_34]
+ | | | Group By Operator [GBY_36]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_30]
+ | | | Select Operator [SEL_32]
| | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_149]
+ | | | Filter Operator [FIL_156]
| | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_29]
+ | | | TableScan [TS_31]
| | | alias:y
- | | |<-Map 17 [CONTAINS]
- | | Reduce Output Operator [RS_44]
+ | | |<-Map 18 [CONTAINS]
+ | | Reduce Output Operator [RS_46]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_43]
+ | | Group By Operator [GBY_45]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_39]
+ | | Select Operator [SEL_41]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_150]
+ | | Filter Operator [FIL_157]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_38]
+ | | TableScan [TS_40]
| | alias:y
- | |<-Map Join Operator [MAPJOIN_160]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 19":"_col0 (type: string)","Map 18":"_col0 (type: string)"}
- | | outputColumnNames:["_col1","_col2","_col3"]
- | |<-Map 19 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_54]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string)
- | | Select Operator [SEL_50]
- | | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_152]
- | | predicate:(key is not null and value is not null) (type: boolean)
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_49]
- | | alias:x
- | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | |<-Select Operator [SEL_48]
+ | |<-Select Operator [SEL_28]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_151]
+ | Filter Operator [FIL_154]
| predicate:key is not null (type: boolean)
- | TableScan [TS_47]
+ | TableScan [TS_27]
| alias:y
- |<-Map 5 [CONTAINS]
+ |<-Map 1 [CONTAINS]
Reduce Output Operator [RS_67]
key expressions:_col0 (type: string), _col1 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -2149,77 +2151,78 @@
outputColumnNames:["_col0","_col1"]
Select Operator [SEL_26]
outputColumnNames:["_col0","_col1"]
- Map Join Operator [MAPJOIN_162]
+ Map Join Operator [MAPJOIN_166]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col3 (type: string)"}
- | outputColumnNames:["_col2","_col3"]
- |<-Reducer 3 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_22]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
+ | keys:{"Map 1":"_col0 (type: string)","Reducer 8":"_col1 (type: string)"}
+ | outputColumnNames:["_col1","_col3"]
+ |<-Reducer 8 [BROADCAST_EDGE]
+ | Reduce Output Operator [RS_24]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
| sort order:+
- | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_10]
+ | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | Map Join Operator [MAPJOIN_165]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"Map 10":"_col1 (type: string)","Reducer 8":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 10 [BROADCAST_EDGE]
+ | | Reduce Output Operator [RS_18]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Select Operator [SEL_14]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_153]
+ | | predicate:(value is not null and key is not null) (type: boolean)
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_13]
+ | | alias:x
+ | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | |<-Select Operator [SEL_12]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Group By Operator [GBY_9]
+ | Group By Operator [GBY_11]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | |<-Union 2 [SIMPLE_EDGE]
- | |<-Map 1 [CONTAINS]
- | | Reduce Output Operator [RS_8]
+ | |<-Union 7 [SIMPLE_EDGE]
+ | |<-Map 6 [CONTAINS]
+ | | Reduce Output Operator [RS_10]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_7]
+ | | Group By Operator [GBY_9]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_1]
+ | | Select Operator [SEL_3]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_144]
+ | | Filter Operator [FIL_151]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_0]
+ | | TableScan [TS_2]
| | alias:x
- | |<-Map 4 [CONTAINS]
- | Reduce Output Operator [RS_8]
+ | |<-Map 9 [CONTAINS]
+ | Reduce Output Operator [RS_10]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Group By Operator [GBY_7]
+ | Group By Operator [GBY_9]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Select Operator [SEL_3]
+ | Select Operator [SEL_5]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_145]
+ | Filter Operator [FIL_152]
| predicate:value is not null (type: boolean)
- | TableScan [TS_2]
+ | TableScan [TS_4]
| alias:y
- |<-Map Join Operator [MAPJOIN_159]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 10":"_col0 (type: string)","Map 5":"_col0 (type: string)"}
- | outputColumnNames:["_col1","_col2","_col3"]
- |<-Map 10 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_18]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string)
- | Select Operator [SEL_14]
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_147]
- | predicate:(key is not null and value is not null) (type: boolean)
- | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_13]
- | alias:x
- | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- |<-Select Operator [SEL_12]
+ |<-Select Operator [SEL_1]
outputColumnNames:["_col0","_col1"]
- Filter Operator [FIL_146]
+ Filter Operator [FIL_150]
predicate:key is not null (type: boolean)
- TableScan [TS_11]
+ TableScan [TS_0]
alias:y
PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
@@ -2782,308 +2785,351 @@
Plan optimized by CBO.
Vertex dependency in root stage
-Map 12 <- Union 13 (CONTAINS)
-Map 14 <- Union 13 (CONTAINS)
-Map 21 <- Map 20 (BROADCAST_EDGE)
-Map 1 <- Union 2 (CONTAINS)
-Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 13 (SIMPLE_EDGE), Union 4 (CONTAINS)
-Map 19 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS)
-Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-Map 16 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS)
-Map 15 <- Union 13 (CONTAINS)
-Map 18 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS)
-Map 17 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS)
-Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
-Map 5 <- Union 2 (CONTAINS)
-Map 6 <- Map 7 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 5 (SIMPLE_EDGE)
+Map 12 <- Union 9 (CONTAINS)
+Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Map 13 <- Union 9 (CONTAINS)
+Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
+Map 4 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS)
+Map 19 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS)
+Map 6 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS)
+Map 16 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS)
+Map 8 <- Union 9 (CONTAINS)
+Map 18 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS)
+Map 17 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Union 4
- |<-Reducer 10 [CONTAINS]
- | File Output Operator [FS_77]
+ Union 3
+ |<-Reducer 2 [CONTAINS]
+ | File Output Operator [FS_76]
| compressed:false
| table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- | Select Operator [SEL_45]
+ | Select Operator [SEL_21]
| outputColumnNames:["_col0","_col1"]
- | Merge Join Operator [MERGEJOIN_118]
+ | Merge Join Operator [MERGEJOIN_120]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"}
- | | outputColumnNames:["_col0","_col3"]
- | |<-Reducer 9 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_41]
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1","_col3"]
+ | |<-Map 1 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_17]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col1 (type: string)
+ | | Select Operator [SEL_1]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_104]
+ | | predicate:key is not null (type: boolean)
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_0]
+ | | alias:y
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | |<-Union 5 [SIMPLE_EDGE]
+ | |<-Map 4 [CONTAINS]
+ | | Reduce Output Operator [RS_19]
| | key expressions:_col1 (type: string)
| | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
- | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col0 (type: string), _col3 (type: string)
- | | Merge Join Operator [MERGEJOIN_115]
+ | | Map Join Operator [MAPJOIN_119]
| | | condition map:[{"":"Inner Join 0 to 1"}]
- | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
- | | | outputColumnNames:["_col0","_col1","_col3"]
- | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | |<-Map 11 [SIMPLE_EDGE]
- | | | Reduce Output Operator [RS_38]
- | | | key expressions:_col0 (type: string)
- | | | Map-reduce partition columns:_col0 (type: string)
+ | | | keys:{"Map 4":"_col0 (type: string)","Map 7":"_col1 (type: string)"}
+ | | | outputColumnNames:["_col1"]
+ | | |<-Map 7 [BROADCAST_EDGE]
+ | | | Reduce Output Operator [RS_13]
+ | | | key expressions:_col1 (type: string)
+ | | | Map-reduce partition columns:_col1 (type: string)
| | | sort order:+
- | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | value expressions:_col1 (type: string)
- | | | Select Operator [SEL_25]
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | value expressions:_col0 (type: string)
+ | | | Select Operator [SEL_9]
| | | outputColumnNames:["_col0","_col1"]
- | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | Filter Operator [FIL_104]
- | | | predicate:key is not null (type: boolean)
- | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | TableScan [TS_24]
- | | | alias:y
- | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | | |<-Map 8 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_36]
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | Filter Operator [FIL_107]
+ | | | predicate:(value is not null and key is not null) (type: boolean)
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | TableScan [TS_8]
+ | | | alias:x
+ | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | | | Reduce Output Operator [RS_125]
+ | | | key expressions:_col1 (type: string)
+ | | | Map-reduce partition columns:_col1 (type: string)
+ | | | sort order:+
+ | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | | value expressions:_col0 (type: string)
+ | | | Please refer to the previous Select Operator [SEL_9]
+ | | |<-Select Operator [SEL_3]
+ | | outputColumnNames:["_col0"]
+ | | Filter Operator [FIL_105]
+ | | predicate:value is not null (type: boolean)
+ | | TableScan [TS_2]
+ | | alias:x
+ | |<-Map 6 [CONTAINS]
+ | Reduce Output Operator [RS_19]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
+ | Map Join Operator [MAPJOIN_119]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"Map 7":"_col1 (type: string)","Map 6":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | |<- Please refer to the previous Map 7 [BROADCAST_EDGE]
+ | |<-Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Filter Operator [FIL_106]
+ | predicate:value is not null (type: boolean)
+ | TableScan [TS_4]
+ | alias:y
+ |<-Reducer 11 [CONTAINS]
+ | File Output Operator [FS_76]
+ | compressed:false
+ | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+ | Select Operator [SEL_45]
+ | outputColumnNames:["_col0","_col1"]
+ | Merge Join Operator [MERGEJOIN_122]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"}
+ | | outputColumnNames:["_col1","_col4"]
+ | |<-Map 15 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_43]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
- | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| | value expressions:_col1 (type: string)
- | | Select Operator [SEL_23]
+ | | Select Operator [SEL_34]
| | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_112]
+ | | predicate:key is not null (type: boolean)
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_33]
+ | | alias:y
+ | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | |<-Reducer 10 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_41]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE
+ | Merge Join Operator [MERGEJOIN_121]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 14 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_38]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
| | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_103]
- | | predicate:(key is not null and value is not null) (type: boolean)
+ | | value expressions:_col0 (type: string)
+ | | Select Operator [SEL_32]
+ | | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_22]
+ | | Filter Operator [FIL_111]
+ | | predicate:(value is not null and key is not null) (type: boolean)
+ | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_31]
| | alias:y
| | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | |<-Union 13 [SIMPLE_EDGE]
+ | |<-Union 9 [SIMPLE_EDGE]
| |<-Map 12 [CONTAINS]
- | | Reduce Output Operator [RS_43]
+ | | Reduce Output Operator [RS_36]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
- | | Select Operator [SEL_27]
+ | | Select Operator [SEL_25]
| | outputColumnNames:["_col0"]
- | | Filter Operator [FIL_105]
+ | | Filter Operator [FIL_109]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_26]
- | | alias:x
- | |<-Map 14 [CONTAINS]
- | | Reduce Output Operator [RS_43]
+ | | TableScan [TS_24]
+ | | alias:y
+ | |<-Map 13 [CONTAINS]
+ | | Reduce Output Operator [RS_36]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
| | Select Operator [SEL_29]
| | outputColumnNames:["_col0"]
- | | Filter Operator [FIL_106]
+ | | Filter Operator [FIL_110]
| | predicate:value is not null (type: boolean)
| | TableScan [TS_28]
| | alias:y
- | |<-Map 15 [CONTAINS]
- | Reduce Output Operator [RS_43]
+ | |<-Map 8 [CONTAINS]
+ | Reduce Output Operator [RS_36]
| key expressions:_col0 (type: string)
| Map-reduce partition columns:_col0 (type: string)
| sort order:+
- | Select Operator [SEL_33]
+ | Select Operator [SEL_23]
| outputColumnNames:["_col0"]
- | Filter Operator [FIL_107]
+ | Filter Operator [FIL_108]
| predicate:value is not null (type: boolean)
- | TableScan [TS_32]
- | alias:y
+ | TableScan [TS_22]
+ | alias:x
|<-Map 19 [CONTAINS]
- | File Output Operator [FS_77]
+ | File Output Operator [FS_76]
| compressed:false
| table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- | Select Operator [SEL_75]
+ | Select Operator [SEL_74]
| outputColumnNames:["_col0","_col1"]
- | Map Join Operator [MAPJOIN_119]
+ | Map Join Operator [MAPJOIN_124]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 21":"_col1 (type: string)","Map 19":"_col0 (type: string)"}
+ | | keys:{"Map 21":"_col0 (type: string)","Map 19":"_col1 (type: string)"}
| | outputColumnNames:["_col1","_col4"]
| |<-Map 21 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_73]
- | | key expressions:_col1 (type: string)
- | | Map-reduce partition columns:_col1 (type: string)
+ | | Reduce Output Operator [RS_72]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
- | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col0 (type: string), _col3 (type: string)
- | | Map Join Operator [MAPJOIN_116]
- | | | condition map:[{"":"Inner Join 0 to 1"}]
- | | | keys:{"Map 21":"_col0 (type: string)","Map 20":"_col0 (type: string)"}
- | | | outputColumnNames:["_col0","_col1","_col3"]
- | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- | | |<-Map 20 [BROADCAST_EDGE]
- | | | Reduce Output Operator [RS_65]
- | | | key expressions:_col0 (type: string)
- | | | Map-reduce partition columns:_col0 (type: string)
- | | | sort order:+
- | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | | value expressions:_col1 (type: string)
- | | | Select Operator [SEL_61]
- | | | outputColumnNames:["_col0","_col1"]
- | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | | Filter Operator [FIL_112]
- | | | predicate:(key is not null and value is not null) (type: boolean)
- | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | | TableScan [TS_60]
- | | | alias:x
- | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | | |<-Select Operator [SEL_63]
+ | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col1 (type: string)
+ | | Select Operator [SEL_63]
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_113]
+ | | Filter Operator [FIL_118]
| | predicate:key is not null (type: boolean)
| | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
| | TableScan [TS_62]
| | alias:x
| | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | | Reduce Output Operator [RS_122]
+ | | Reduce Output Operator [RS_131]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col1 (type: string)
+ | | Please refer to the previous Select Operator [SEL_63]
+ | | Reduce Output Operator [RS_132]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col1 (type: string)
+ | | Please refer to the previous Select Operator [SEL_63]
+ | | Reduce Output Operator [RS_133]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col1 (type: string)
+ | | Please refer to the previous Select Operator [SEL_63]
+ | |<-Map Join Operator [MAPJOIN_123]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"Map 20":"_col1 (type: string)","Map 19":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | |<-Map 20 [BROADCAST_EDGE]
+ | | Reduce Output Operator [RS_67]
| | key expressions:_col1 (type: string)
| | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
- | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col0 (type: string), _col3 (type: string)
- | | Please refer to the previous Map Join Operator [MAPJOIN_116]
- | | Reduce Output Operator [RS_123]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Select Operator [SEL_61]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_117]
+ | | predicate:(value is not null and key is not null) (type: boolean)
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_60]
+ | | alias:x
+ | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | | Reduce Output Operator [RS_127]
| | key expressions:_col1 (type: string)
| | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
- | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col0 (type: string), _col3 (type: string)
- | | Please refer to the previous Map Join Operator [MAPJOIN_116]
- | | Reduce Output Operator [RS_124]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Please refer to the previous Select Operator [SEL_61]
+ | | Reduce Output Operator [RS_128]
| | key expressions:_col1 (type: string)
| | Map-reduce partition columns:_col1 (type: string)
| | sort order:+
- | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col0 (type: string), _col3 (type: string)
- | | Please refer to the previous Map Join Operator [MAPJOIN_116]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Please refer to the previous Select Operator [SEL_61]
+ | | Reduce Output Operator [RS_129]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Please refer to the previous Select Operator [SEL_61]
| |<-Select Operator [SEL_58]
| outputColumnNames:["_col0"]
- | Filter Operator [FIL_111]
+ | Filter Operator [FIL_116]
| predicate:value is not null (type: boolean)
| TableScan [TS_57]
| alias:y
|<-Map 16 [CONTAINS]
- | File Output Operator [FS_77]
+ | File Output Operator [FS_76]
| compressed:false
| table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- | Select Operator [SEL_75]
+ | Select Operator [SEL_74]
| outputColumnNames:["_col0","_col1"]
- | Map Join Operator [MAPJOIN_119]
+ | Map Join Operator [MAPJOIN_124]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 21":"_col1 (type: string)","Map 16":"_col0 (type: string)"}
+ | | keys:{"Map 21":"_col0 (type: string)","Map 16":"_col1 (type: string)"}
| | outputColumnNames:["_col1","_col4"]
| |<- Please refer to the previous Map 21 [BROADCAST_EDGE]
+ | |<-Map Join Operator [MAPJOIN_123]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"Map 20":"_col1 (type: string)","Map 16":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | |<- Please refer to the previous Map 20 [BROADCAST_EDGE]
| |<-Select Operator [SEL_49]
| outputColumnNames:["_col0"]
- | Filter Operator [FIL_108]
+ | Filter Operator [FIL_113]
| predicate:value is not null (type: boolean)
| TableScan [TS_48]
| alias:x
|<-Map 18 [CONTAINS]
- | File Output Operator [FS_77]
+ | File Output Operator [FS_76]
| compressed:false
| table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- | Select Operator [SEL_75]
+ | Select Operator [SEL_74]
| outputColumnNames:["_col0","_col1"]
- | Map Join Operator [MAPJOIN_119]
+ | Map Join Operator [MAPJOIN_124]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 21":"_col1 (type: string)","Map 18":"_col0 (type: string)"}
+ | | keys:{"Map 21":"_col0 (type: string)","Map 18":"_col1 (type: string)"}
| | outputColumnNames:["_col1","_col4"]
| |<- Please refer to the previous Map 21 [BROADCAST_EDGE]
+ | |<-Map Join Operator [MAPJOIN_123]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"Map 20":"_col1 (type: string)","Map 18":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | |<- Please refer to the previous Map 20 [BROADCAST_EDGE]
| |<-Select Operator [SEL_55]
| outputColumnNames:["_col0"]
- | Filter Operator [FIL_110]
+ | Filter Operator [FIL_115]
| predicate:value is not null (type: boolean)
| TableScan [TS_54]
| alias:y
|<-Map 17 [CONTAINS]
- | File Output Operator [FS_77]
- | compressed:false
- | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- | Select Operator [SEL_75]
- | outputColumnNames:["_col0","_col1"]
- | Map Join Operator [MAPJOIN_119]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 21":"_col1 (type: string)","Map 17":"_col0 (type: string)"}
- | | outputColumnNames:["_col1","_col4"]
- | |<- Please refer to the previous Map 21 [BROADCAST_EDGE]
- | |<-Select Operator [SEL_51]
- | outputColumnNames:["_col0"]
- | Filter Operator [FIL_109]
- | predicate:value is not null (type: boolean)
- | TableScan [TS_50]
- | alias:y
- |<-Reducer 3 [CONTAINS]
- File Output Operator [FS_77]
+ File Output Operator [FS_76]
compressed:false
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Select Operator [SEL_21]
+ Select Operator [SEL_74]
outputColumnNames:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_117]
+ Map Join Operator [MAPJOIN_124]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"}
- | outputColumnNames:["_col2","_col3"]
- |<-Map 6 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_19]
- | key expressions:_col3 (type: string)
- | Map-reduce partition columns:_col3 (type: string)
- | sort order:+
- | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col1 (type: string), _col2 (type: string)
- | Map Join Operator [MAPJOIN_114]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 7":"_col0 (type: string)","Map 6":"_col0 (type: string)"}
- | | outputColumnNames:["_col1","_col2","_col3"]
- | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 7 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_13]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string)
- | | Select Operator [SEL_9]
- | | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_102]
- | | predicate:(key is not null and value is not null) (type: boolean)
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_8]
- | | alias:x
- | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | |<-Select Operator [SEL_7]
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_101]
- | predicate:key is not null (type: boolean)
- | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_6]
- | alias:y
- | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- |<-Union 2 [SIMPLE_EDGE]
- |<-Map 1 [CONTAINS]
- | Reduce Output Operator [RS_17]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
- | Select Operator [SEL_1]
- | outputColumnNames:["_col0"]
- | Filter Operator [FIL_99]
- | predicate:value is not null (type: boolean)
- | TableScan [TS_0]
- | alias:x
- |<-Map 5 [CONTAINS]
- Reduce Output Operator [RS_17]
- key expressions:_col0 (type: string)
- Map-reduce partition columns:_col0 (type: string)
- sort order:+
- Select Operator [SEL_3]
+ | keys:{"Map 21":"_col0 (type: string)","Map 17":"_col1 (type: string)"}
+ | outputColumnNames:["_col1","_col4"]
+ |<- Please refer to the previous Map 21 [BROADCAST_EDGE]
+ |<-Map Join Operator [MAPJOIN_123]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"Map 20":"_col1 (type: string)","Map 17":"_col0 (type: string)"}
+ | outputColumnNames:["_col1"]
+ |<- Please refer to the previous Map 20 [BROADCAST_EDGE]
+ |<-Select Operator [SEL_51]
outputColumnNames:["_col0"]
- Filter Operator [FIL_100]
+ Filter Operator [FIL_114]
predicate:value is not null (type: boolean)
- TableScan [TS_2]
+ TableScan [TS_50]
alias:y
PREHOOK: query: explain
SELECT x.key, y.value
@@ -3114,56 +3160,54 @@
Plan optimized by CBO.
Vertex dependency in root stage
+Map 12 <- Union 13 (CONTAINS)
Map 30 <- Union 24 (CONTAINS)
-Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE)
-Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS)
Map 23 <- Union 24 (CONTAINS)
Map 32 <- Union 28 (CONTAINS)
Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS)
-Map 22 <- Union 19 (CONTAINS)
Map 31 <- Union 26 (CONTAINS)
-Map 21 <- Union 17 (CONTAINS)
-Map 34 <- Map 33 (BROADCAST_EDGE)
-Map 1 <- Union 2 (CONTAINS)
-Reducer 20 <- Union 19 (SIMPLE_EDGE)
-Map 10 <- Map 11 (BROADCAST_EDGE)
-Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
-Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS)
-Map 16 <- Union 17 (CONTAINS)
-Reducer 8 <- Union 7 (SIMPLE_EDGE)
+Map 20 <- Union 15 (CONTAINS)
+Map 10 <- Union 8 (CONTAINS)
+Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Map 19 <- Union 13 (CONTAINS)
+Reducer 6 <- Union 5 (SIMPLE_EDGE)
+Reducer 9 <- Map 11 (BROADCAST_EDGE), Union 8 (SIMPLE_EDGE)
+Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS)
-Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS)
-Reducer 29 <- Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS)
-Reducer 3 <- Union 2 (SIMPLE_EDGE)
-Map 9 <- Union 2 (CONTAINS)
+Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Reducer 29 <- Map 33 (BROADCAST_EDGE), Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 16 <- Union 15 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Map 7 <- Union 8 (CONTAINS)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 8
- File Output Operator [FS_121]
+ Reducer 6
+ File Output Operator [FS_120]
compressed:false
- Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
- Group By Operator [GBY_119]
+ Group By Operator [GBY_118]
| keys:KEY._col0 (type: string), KEY._col1 (type: string)
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE
- |<-Union 7 [SIMPLE_EDGE]
- |<-Reducer 6 [CONTAINS]
- | Reduce Output Operator [RS_118]
+ | Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE
+ |<-Union 5 [SIMPLE_EDGE]
+ |<-Reducer 4 [CONTAINS]
+ | Reduce Output Operator [RS_117]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Group By Operator [GBY_117]
+ | Group By Operator [GBY_116]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
| Group By Operator [GBY_67]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | |<-Union 5 [SIMPLE_EDGE]
- | |<-Reducer 14 [CONTAINS]
+ | |<-Union 3 [SIMPLE_EDGE]
+ | |<-Reducer 18 [CONTAINS]
| | Reduce Output Operator [RS_66]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -3173,123 +3217,122 @@
| | outputColumnNames:["_col0","_col1"]
| | Select Operator [SEL_61]
| | outputColumnNames:["_col0","_col1"]
- | | Merge Join Operator [MERGEJOIN_162]
+ | | Merge Join Operator [MERGEJOIN_166]
| | | condition map:[{"":"Inner Join 0 to 1"}]
| | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"}
- | | | outputColumnNames:["_col0","_col3"]
- | | |<-Reducer 13 [SIMPLE_EDGE]
- | | | Reduce Output Operator [RS_57]
- | | | key expressions:_col1 (type: string)
- | | | Map-reduce partition columns:_col1 (type: string)
- | | | sort order:+
- | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | | value expressions:_col0 (type: string), _col3 (type: string)
- | | | Merge Join Operator [MERGEJOIN_159]
- | | | | condition map:[{"":"Inner Join 0 to 1"}]
- | | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
- | | | | outputColumnNames:["_col0","_col1","_col3"]
- | | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | | |<-Map 12 [SIMPLE_EDGE]
- | | | | Reduce Output Operator [RS_52]
- | | | | key expressions:_col0 (type: string)
- | | | | Map-reduce partition columns:_col0 (type: string)
- | | | | sort order:+
- | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | | | | value expressions:_col1 (type: string)
- | | | | Select Operator [SEL_28]
- | | | | outputColumnNames:["_col0","_col1"]
- | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | | | | Filter Operator [FIL_147]
- | | | | predicate:(key is not null and value is not null) (type: boolean)
- | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- | | | | TableScan [TS_27]
- | | | | alias:y
- | | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | | | |<-Map 15 [SIMPLE_EDGE]
- | | | Reduce Output Operator [RS_54]
+ | | | outputColumnNames:["_col1","_col4"]
+ | | |<-Map 22 [SIMPLE_EDGE]
+ | | | Reduce Output Operator [RS_59]
| | | key expressions:_col0 (type: string)
| | | Map-reduce partition columns:_col0 (type: string)
| | | sort order:+
| | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
| | | value expressions:_col1 (type: string)
- | | | Select Operator [SEL_30]
+ | | | Select Operator [SEL_50]
| | | outputColumnNames:["_col0","_col1"]
| | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | Filter Operator [FIL_148]
+ | | | Filter Operator [FIL_156]
| | | predicate:key is not null (type: boolean)
| | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | | TableScan [TS_29]
+ | | | TableScan [TS_49]
| | | alias:y
| | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | | |<-Reducer 20 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_59]
+ | | |<-Reducer 17 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_57]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ | | Merge Join Operator [MERGEJOIN_165]
+ | | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | | outputColumnNames:["_col1"]
+ | | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Map 21 [SIMPLE_EDGE]
+ | | | Reduce Output Operator [RS_54]
+ | | | key expressions:_col1 (type: string)
+ | | | Map-reduce partition columns:_col1 (type: string)
+ | | | sort order:+
+ | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | | value expressions:_col0 (type: string)
+ | | | Select Operator [SEL_48]
+ | | | outputColumnNames:["_col0","_col1"]
+ | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | | Filter Operator [FIL_155]
+ | | | predicate:(value is not null and key is not null) (type: boolean)
+ | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ | | | TableScan [TS_47]
+ | | | alias:y
+ | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ | | |<-Reducer 16 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_52]
| | key expressions:_col0 (type: string)
| | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
| | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | | Select Operator [SEL_50]
+ | | Select Operator [SEL_46]
| | outputColumnNames:["_col0"]
| | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | | Group By Operator [GBY_49]
+ | | Group By Operator [GBY_45]
| | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
| | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE
- | | |<-Union 19 [SIMPLE_EDGE]
- | | |<-Map 22 [CONTAINS]
- | | | Reduce Output Operator [RS_48]
+ | | |<-Union 15 [SIMPLE_EDGE]
+ | | |<-Reducer 14 [CONTAINS]
+ | | | Reduce Output Operator [RS_44]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_47]
+ | | | Group By Operator [GBY_43]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_43]
- | | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_151]
- | | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_42]
- | | | alias:y
- | | |<-Reducer 18 [CONTAINS]
- | | Reduce Output Operator [RS_48]
- | | key expressions:_col0 (type: string), _col1 (type: string)
- | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
- | | sort order:++
- | | Group By Operator [GBY_47]
- | | keys:_col0 (type: string), _col1 (type: string)
- | | outputColumnNames:["_col0","_col1"]
- | | Group By Operator [GBY_40]
- | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
- | | | outputColumnNames:["_col0","_col1"]
- | | |<-Union 17 [SIMPLE_EDGE]
- | | |<-Map 21 [CONTAINS]
- | | | Reduce Output Operator [RS_39]
+ | | | Group By Operator [GBY_36]
+ | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string)
+ | | | | outputColumnNames:["_col0","_col1"]
+ | | | |<-Union 13 [SIMPLE_EDGE]
+ | | | |<-Map 12 [CONTAINS]
+ | | | | Reduce Output Operator [RS_35]
+ | | | | key expressions:_col0 (type: string), _col1 (type: string)
+ | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
+ | | | | sort order:++
+ | | | | Group By Operator [GBY_34]
+ | | | | keys:_col0 (type: string), _col1 (type: string)
+ | | | | outputColumnNames:["_col0","_col1"]
+ | | | | Select Operator [SEL_28]
+ | | | | outputColumnNames:["_col0","_col1"]
+ | | | | Filter Operator [FIL_152]
+ | | | | predicate:value is not null (type: boolean)
+ | | | | TableScan [TS_27]
+ | | | | alias:x
+ | | | |<-Map 19 [CONTAINS]
+ | | | Reduce Output Operator [RS_35]
| | | key expressions:_col0 (type: string), _col1 (type: string)
| | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | | sort order:++
- | | | Group By Operator [GBY_38]
+ | | | Group By Operator [GBY_34]
| | | keys:_col0 (type: string), _col1 (type: string)
| | | outputColumnNames:["_col0","_col1"]
- | | | Select Operator [SEL_34]
+ | | | Select Operator [SEL_30]
| | | outputColumnNames:["_col0","_col1"]
- | | | Filter Operator [FIL_150]
+ | | | Filter Operator [FIL_153]
| | | predicate:value is not null (type: boolean)
- | | | TableScan [TS_33]
+ | | | TableScan [TS_29]
| | | alias:y
- | | |<-Map 16 [CONTAINS]
- | | Reduce Output Operator [RS_39]
+ | | |<-Map 20 [CONTAINS]
+ | | Reduce Output Operator [RS_44]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_38]
+ | | Group By Operator [GBY_43]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_32]
+ | | Select Operator [SEL_39]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_149]
+ | | Filter Operator [FIL_154]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_31]
- | | alias:x
- | |<-Reducer 4 [CONTAINS]
+ | | TableScan [TS_38]
+ | | alias:y
+ | |<-Reducer 2 [CONTAINS]
| Reduce Output Operator [RS_66]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
@@ -3299,140 +3342,139 @@
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_26]
| outputColumnNames:["_col0","_col1"]
- | Merge Join Operator [MERGEJOIN_161]
+ | Merge Join Operator [MERGEJOIN_164]
| | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"}
- | | outputColumnNames:["_col2","_col3"]
- | |<-Map 10 [SIMPLE_EDGE]
- | | Reduce Output Operator [RS_24]
- | | key expressions:_col3 (type: string)
- | | Map-reduce partition columns:_col3 (type: string)
+ | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1","_col3"]
+ | |<-Map 1 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_22]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
| | sort order:+
- | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string), _col2 (type: string)
- | | Map Join Operator [MAPJOIN_158]
- | | | condition map:[{"":"Inner Join 0 to 1"}]
- | | | keys:{"Map 11":"_col0 (type: string)","Map 10":"_col0 (type: string)"}
- | | | outputColumnNames:["_col1","_col2","_col3"]
- | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- | | |<-Map 11 [BROADCAST_EDGE]
- | | | Reduce Output Operator [RS_18]
- | | | key expressions:_col0 (type: string)
- | | | Map-reduce partition columns:_col0 (type: string)
- | | | sort order:+
- | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | | value expressions:_col1 (type: string)
- | | | Select Operator [SEL_14]
- | | | outputColumnNames:["_col0","_col1"]
- | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | | Filter Operator [FIL_146]
- | | | predicate:(key is not null and value is not null) (type: boolean)
- | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | | TableScan [TS_13]
- | | | alias:x
- | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | | |<-Select Operator [SEL_12]
+ | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col1 (type: string)
+ | | Select Operator [SEL_1]
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_145]
+ | | Filter Operator [FIL_148]
| | predicate:key is not null (type: boolean)
| | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_11]
+ | | TableScan [TS_0]
| | alias:y
| | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- | |<-Reducer 3 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_22]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
+ | |<-Reducer 9 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_24]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
| sort order:+
- | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Select Operator [SEL_10]
+ | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | Map Join Operator [MAPJOIN_163]
+ | | condition map:[{"":"Inner Join 0 to 1"}]
+ | | keys:{"Map 11":"_col1 (type: string)","Reducer 9":"_col0 (type: string)"}
+ | | outputColumnNames:["_col1"]
+ | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE
+ | |<-Map 11 [BROADCAST_EDGE]
+ | | Reduce Output Operator [RS_18]
+ | | key expressions:_col1 (type: string)
+ | | Map-reduce partition columns:_col1 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | value expressions:_col0 (type: string)
+ | | Select Operator [SEL_14]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | Filter Operator [FIL_151]
+ | | predicate:(value is not null and key is not null) (type: boolean)
+ | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | | TableScan [TS_13]
+ | | alias:x
+ | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ | |<-Select Operator [SEL_12]
| outputColumnNames:["_col0"]
| Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | Group By Operator [GBY_9]
+ | Group By Operator [GBY_11]
| | keys:KEY._col0 (type: string), KEY._col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
| | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE
- | |<-Union 2 [SIMPLE_EDGE]
- | |<-Map 1 [CONTAINS]
- | | Reduce Output Operator [RS_8]
+ | |<-Union 8 [SIMPLE_EDGE]
+ | |<-Map 10 [CONTAINS]
+ | | Reduce Output Operator [RS_10]
| | key expressions:_col0 (type: string), _col1 (type: string)
| | Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| | sort order:++
- | | Group By Operator [GBY_7]
+ | | Group By Operator [GBY_9]
| | keys:_col0 (type: string), _col1 (type: string)
| | outputColumnNames:["_col0","_col1"]
- | | Select Operator [SEL_1]
+ | | Select Operator [SEL_5]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_143]
+ | | Filter Operator [FIL_150]
| | predicate:value is not null (type: boolean)
- | | TableScan [TS_0]
- | | alias:x
- | |<-Map 9 [CONTAINS]
- | Reduce Output Operator [RS_8]
+ | | TableScan [TS_4]
+ | | alias:y
+ | |<-Map 7 [CONTAINS]
+ | Reduce Output Operator [RS_10]
| key expressions:_col0 (type: string), _col1 (type: string)
| Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
| sort order:++
- | Group By Operator [GBY_7]
+ | Group By Operator [GBY_9]
| keys:_col0 (type: string), _col1 (type: string)
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_3]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_144]
+ | Filter Operator [FIL_149]
| predicate:value is not null (type: boolean)
| TableScan [TS_2]
- | alias:y
+ | alias:x
|<-Reducer 29 [CONTAINS]
- Reduce Output Operator [RS_118]
+ Reduce Output Operator [RS_117]
key expressions:_col0 (type: string), _col1 (type: string)
Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
sort order:++
- Group By Operator [GBY_117]
+ Group By Operator [GBY_116]
keys:_col0 (type: string), _col1 (type: string)
outputColumnNames:["_col0","_col1"]
- Select Operator [SEL_113]
+ Select Operator [SEL_112]
outputColumnNames:["_col0","_col1"]
- Map Join Operator [MAPJOIN_163]
+ Map Join Operator [MAPJOIN_168]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"Map 34":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"}
+ | keys:{"Map 34":"_col0 (type: string)","Reducer 29":"_col1 (type: string)"}
| outputColumnNames:["_col1","_col4"]
|<-Map 34 [BROADCAST_EDGE]
- | Reduce Output Operator [RS_111]
- | key expressions:_col1 (type: string)
- | Map-reduce partition columns:_col1 (type: string)
+ | Reduce Output Operator [RS_110]
+ | key expressions:_col0 (type: string)
+ | Map-reduce partition columns:_col0 (type: string)
| sort order:+
- | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- | value expressions:_col0 (type: string), _col3 (type: string)
- | Map Join Operator [MAPJOIN_160]
- | | condition map:[{"":"Inner Join 0 to 1"}]
- | | keys:{"Map 34":"_col0 (type: string)","Map 33":"_col0 (type: string)"}
- | | outputColumnNames:["_col0","_col1","_col3"]
- | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- | |<-Map 33 [BROADCAST_EDGE]
- | | Reduce Output Operator [RS_103]
- | | key expressions:_col0 (type: string)
- | | Map-reduce partition columns:_col0 (type: string)
- | | sort order:+
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | value expressions:_col1 (type: string)
- | | Select Operator [SEL_99]
- | | outputColumnNames:["_col0","_col1"]
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | Filter Operator [FIL_156]
- | | predicate:(key is not null and value is not null) (type: boolean)
- | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
- | | TableScan [TS_98]
- | | alias:x
- | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- | |<-Select Operator [SEL_101]
+ | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col1 (type: string)
+ | Select Operator [SEL_101]
| outputColumnNames:["_col0","_col1"]
| Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- | Filter Operator [FIL_157]
+ | Filter Operator [FIL_162]
| predicate:key is not null (type: boolean)
| Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
| TableScan [TS_100]
| alias:x
| Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ |<-Map Join Operator [MAPJOIN_167]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{"Map 33":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"}
+ | outputColumnNames:["_col1"]
+ |<-Map 33 [BROADCAST_EDGE]
+ | Reduce Output Operator [RS_105]
+ | key expressions:_col1 (type: string)
+ | Map-reduce partition columns:_col1 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | value expressions:_col0 (type: string)
+ | Select Operator [SEL_99]
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | Filter Operator [FIL_161]
+ | predicate:(value is not null and key is not null) (type: boolean)
+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_98]
+ | alias:x
+ | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
|<-Select Operator [SEL_97]
outputColumnNames:["_col0"]
Group By Operator [GBY_96]
@@ -3449,7 +3491,7 @@
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_90]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_155]
+ | Filter Operator [FIL_160]
| predicate:value is not null (type: boolean)
| TableScan [TS_89]
| alias:y
@@ -3487,7 +3529,7 @@
| | outputColumnNames:["_col0","_col1"]
| | Select Operator [SEL_72]
| | outputColumnNames:["_col0","_col1"]
- | | Filter Operator [FIL_153]
+ | | Filter Operator [FIL_158]
| | predicate:value is not null (type: boolean)
| | TableScan [TS_71]
| | alias:y
@@ -3501,7 +3543,7 @@
| outputColumnNames:["_col0","_col1"]
| Select Operator [SEL_70]
| outputColumnNames:["_col0","_col1"]
- | Filter Operator [FIL_152]
+ | Filter Operator [FIL_157]
| predicate:value is not null (type: boolean)
| TableScan [TS_69]
| alias:x
@@ -3515,7 +3557,7 @@
outputColumnNames:["_col0","_col1"]
Select Operator [SEL_81]
outputColumnNames:["_col0","_col1"]
- Filter Operator [FIL_154]
+ Filter Operator [FIL_159]
predicate:value is not null (type: boolean)
TableScan [TS_80]
alias:y
Index: ql/src/test/results/clientpositive/union_remove_6_subq.q.out
===================================================================
--- ql/src/test/results/clientpositive/union_remove_6_subq.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/union_remove_6_subq.q.out (working copy)
@@ -559,14 +559,14 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: avg_window_0
arguments: _col1
name: avg
window function: GenericUDAFAverageEvaluatorDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _wcol0 (type: double)
+ expressions: _col0 (type: string), avg_window_0 (type: double)
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/windowing_streaming.q.out
===================================================================
--- ql/src/test/results/clientpositive/windowing_streaming.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/windowing_streaming.q.out (working copy)
@@ -89,7 +89,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
@@ -97,7 +97,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _wcol0 (type: int)
+ expressions: _col2 (type: string), rank_window_0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -160,7 +160,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
@@ -168,10 +168,10 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 < 4) (type: boolean)
+ predicate: (rank_window_0 < 4) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _wcol0 (type: int)
+ expressions: _col2 (type: string), rank_window_0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -329,7 +329,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -337,10 +337,10 @@
isPivotResult: true
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 < 5) (type: boolean)
+ predicate: (rank_window_0 < 5) (type: boolean)
Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: tinyint), _col5 (type: double), _wcol0 (type: int)
+ expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/groupby_resolution.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_resolution.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/groupby_resolution.q.out (working copy)
@@ -690,7 +690,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
@@ -698,7 +698,7 @@
isPivotResult: true
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: bigint), _wcol0 (type: int)
+ expressions: _col0 (type: string), _col1 (type: bigint), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/join32.q.out
===================================================================
--- ql/src/test/results/clientpositive/join32.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/join32.q.out (working copy)
@@ -109,25 +109,71 @@
Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:y
+ $hdt$_0:z
Fetch Operator
limit: -1
+ Partition Description:
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
$hdt$_1:$hdt$_2:x
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:y
+ $hdt$_0:z
TableScan
- alias: y
+ alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
@@ -141,7 +187,7 @@
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (value is not null and key is not null) (type: boolean)
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -150,31 +196,31 @@
HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
+ 1 _col0 (type: string)
Position of Big Table: 0
Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
- alias: z
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3
Position of Big Table: 0
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
@@ -183,11 +229,11 @@
keys:
0 _col0 (type: string)
1 _col3 (type: string)
- outputColumnNames: _col1, _col2, _col5
+ outputColumnNames: _col0, _col4, _col5
Position of Big Table: 1
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -356,7 +402,7 @@
name: default.srcpart
name: default.srcpart
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z]
+ /src [$hdt$_1:$hdt$_1:y]
Stage: Stage-0
Move Operator
@@ -405,8 +451,8 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
Index: ql/src/test/results/clientpositive/join_alt_syntax.q.out
===================================================================
--- ql/src/test/results/clientpositive/join_alt_syntax.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/join_alt_syntax.q.out (working copy)
@@ -359,13 +359,13 @@
and p1.p_partkey = p2.p_partkey
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-2 depends on stages: Stage-3
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -379,34 +379,36 @@
outputColumnNames: _col0, _col1
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (p_partkey is not null and p_name is not null) (type: boolean)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ predicate: p_partkey is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int), _col1 (type: string)
- 1 _col0 (type: int), _col1 (type: string)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -418,89 +420,89 @@
Map Reduce
Map Operator Tree:
TableScan
- alias: p1
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: p_name is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: p_name (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
TableScan
Reduce Output Operator
- key expressions: _col3 (type: string)
- sort order: +
- Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string)
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string)
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col4
- Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ 0 _col0 (type: int), _col1 (type: string)
+ 1 _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col1, _col3, _col5, _col6
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Stage: Stage-1
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: p_partkey is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ predicate: (p_name is not null and p_partkey is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: p1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: int)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string)
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col1 (type: int)
- outputColumnNames: _col1, _col2, _col4, _col6
- Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
@@ -519,54 +521,56 @@
and p1.p_partkey = p2.p_partkey
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-2 depends on stages: Stage-3
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (p_name is not null and p_partkey is not null) (type: boolean)
+ predicate: (p_partkey is not null and p_name is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string), _col0 (type: int)
- sort order: ++
- Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (p_name is not null and p_partkey is not null) (type: boolean)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ predicate: p_partkey is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string), _col0 (type: int)
- sort order: ++
- Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
- Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string), _col0 (type: int)
- 1 _col1 (type: string), _col0 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -578,89 +582,89 @@
Map Reduce
Map Operator Tree:
TableScan
- alias: p1
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: p_name is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: p_name (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: string)
TableScan
Reduce Output Operator
- key expressions: _col3 (type: string)
- sort order: +
- Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string)
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string)
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col4
- Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
+ 0 _col1 (type: string), _col0 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col1, _col3, _col5, _col6
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Stage: Stage-1
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: p_partkey is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ predicate: (p_name is not null and p_partkey is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: p1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: int)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string)
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col1 (type: int)
- outputColumnNames: _col1, _col2, _col4, _col6
- Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/correlationoptimizer12.q.out
===================================================================
--- ql/src/test/results/clientpositive/correlationoptimizer12.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/correlationoptimizer12.q.out (working copy)
@@ -52,7 +52,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
@@ -62,7 +62,7 @@
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _wcol0 (type: bigint)
+ expressions: _col0 (type: string), count_window_0 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -137,7 +137,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
@@ -147,7 +147,7 @@
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _wcol0 (type: bigint)
+ expressions: _col0 (type: string), count_window_0 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/quotedid_basic.q.out
===================================================================
--- ql/src/test/results/clientpositive/quotedid_basic.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/quotedid_basic.q.out (working copy)
@@ -211,7 +211,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
@@ -219,7 +219,7 @@
isPivotResult: true
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
@@ -315,7 +315,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
@@ -323,7 +323,7 @@
isPivotResult: true
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/vectorized_ptf.q.out
===================================================================
--- ql/src/test/results/clientpositive/vectorized_ptf.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/vectorized_ptf.q.out (working copy)
@@ -387,28 +387,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -844,7 +844,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -852,7 +852,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1408,28 +1408,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1782,21 +1782,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -1804,7 +1804,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2238,21 +2238,21 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: lag_window_2
arguments: _col2, 1, _col2
name: lag
window function: GenericUDAFLagEvaluator
@@ -2260,7 +2260,7 @@
isPivotResult: true
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3303,7 +3303,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -3311,7 +3311,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3667,28 +3667,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4032,28 +4032,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4514,28 +4514,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -4909,20 +4909,20 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: count_window_0
arguments: _col5
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol1
+ alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), sum_window_1 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -5503,33 +5503,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -5537,7 +5537,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -6172,14 +6172,14 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(2)~
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double)
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -6706,28 +6706,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -6841,15 +6841,15 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: sum_window_0
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(5)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _wcol0 (type: bigint)
- outputColumnNames: _col1, _col2, _col5, _wcol0
+ expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col5, sum_window_0
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -6860,7 +6860,7 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
- columns _col1,_col2,_col5,_wcol0
+ columns _col1,_col2,_col5,sum_window_0
columns.types string,string,int,bigint
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -6880,7 +6880,7 @@
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
tag: -1
- value expressions: _wcol0 (type: bigint), _col5 (type: int)
+ value expressions: sum_window_0 (type: bigint), _col5 (type: int)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -6891,7 +6891,7 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
- columns _col1,_col2,_col5,_wcol0
+ columns _col1,_col2,_col5,sum_window_0
columns.types string,string,int,bigint
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -6900,7 +6900,7 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
- columns _col1,_col2,_col5,_wcol0
+ columns _col1,_col2,_col5,sum_window_0
columns.types string,string,int,bigint
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -6927,35 +6927,35 @@
raw input shape:
window functions:
window function definition
- alias: _wcol1
+ alias: rank_window_1
arguments: _col3, _col2
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: dense_rank_window_2
arguments: _col3, _col2
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol3
+ alias: cume_dist_window_3
arguments: _col3, _col2
name: cume_dist
window function: GenericUDAFCumeDistEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol4
+ alias: first_value_window_4
arguments: _col6, true
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int)
+ expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -7553,28 +7553,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -8138,28 +8138,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -8637,28 +8637,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -9234,28 +9234,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -9793,28 +9793,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -10311,28 +10311,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/subquery_notin.q.out
===================================================================
--- ql/src/test/results/clientpositive/subquery_notin.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/subquery_notin.q.out (working copy)
@@ -346,7 +346,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -354,7 +354,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean)
+ predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
@@ -501,7 +501,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -509,7 +509,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string), _col2 (type: string)
@@ -628,7 +628,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -636,7 +636,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col5 (type: int)
@@ -775,7 +775,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -783,7 +783,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col5 (type: int)
@@ -950,7 +950,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -958,7 +958,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col5 (type: int)
@@ -1141,7 +1141,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -1149,7 +1149,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col5 (type: int)
Index: ql/src/test/results/clientpositive/cbo_join.q.out
===================================================================
--- ql/src/test/results/clientpositive/cbo_join.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/cbo_join.q.out (working copy)
@@ -1,4 +1,5 @@
-PREHOOK: query: -- 4. Test Select + Join + TS
+PREHOOK: query: -- SORT_QUERY_RESULTS
+-- 4. Test Select + Join + TS
select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -6,7 +7,8 @@
PREHOOK: Input: default@cbo_t2
PREHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-POSTHOOK: query: -- 4. Test Select + Join + TS
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+-- 4. Test Select + Join + TS
select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@cbo_t1
@@ -122,46 +124,6 @@
POSTHOOK: Input: default@cbo_t1@dt=2014
POSTHOOK: Input: default@cbo_t3
#### A masked pattern was here ####
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
1
1
1
@@ -522,6 +484,46 @@
1
1
1
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -632,8 +634,6 @@
POSTHOOK: Input: default@cbo_t2
POSTHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-NULL NULL
-NULL NULL
1 1
1 1
1 1
@@ -730,6 +730,8 @@
1 1
1 1
1 1
+NULL NULL
+NULL NULL
PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -744,8 +746,6 @@
POSTHOOK: Input: default@cbo_t2
POSTHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-NULL NULL
-NULL NULL
1 1
1 1
1 1
@@ -847,6 +847,8 @@
NULL 2
NULL 2
NULL 2
+NULL NULL
+NULL NULL
PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -861,10 +863,6 @@
POSTHOOK: Input: default@cbo_t2
POSTHOOK: Input: default@cbo_t2@dt=2014
#### A masked pattern was here ####
-NULL NULL
-NULL NULL
-NULL NULL
-NULL NULL
1 1
1 1
1 1
@@ -966,6 +964,10 @@
NULL 2
NULL 2
NULL 2
+NULL NULL
+NULL NULL
+NULL NULL
+NULL NULL
PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -5334,8 +5336,6 @@
POSTHOOK: Input: default@cbo_t2@dt=2014
POSTHOOK: Input: default@cbo_t3
#### A masked pattern was here ####
-NULL NULL NULL NULL
-NULL NULL NULL NULL
1 1 1 1
1 1 1 1
1 1 1 1
@@ -5870,6 +5870,8 @@
NULL NULL NULL NULL
NULL NULL NULL NULL
NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL
PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key
PREHOOK: type: QUERY
PREHOOK: Input: default@cbo_t1
@@ -6430,8 +6432,6 @@
POSTHOOK: Input: default@cbo_t2@dt=2014
POSTHOOK: Input: default@cbo_t3
#### A masked pattern was here ####
-NULL NULL NULL NULL
-NULL NULL NULL NULL
1 1 1 1
1 1 1 1
1 1 1 1
@@ -6966,6 +6966,8 @@
NULL NULL NULL NULL
NULL NULL NULL NULL
NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL
PREHOOK: query: -- 5. Test Select + Join + FIL + TS
select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0)
PREHOOK: type: QUERY
Index: ql/src/test/results/clientpositive/ptf_streaming.q.out
===================================================================
--- ql/src/test/results/clientpositive/ptf_streaming.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/ptf_streaming.q.out (working copy)
@@ -97,28 +97,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -313,7 +313,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -321,7 +321,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -637,7 +637,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -645,7 +645,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -814,28 +814,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1061,28 +1061,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1310,28 +1310,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1559,28 +1559,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double)
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1785,33 +1785,33 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol3
+ alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: PRECEDING(MAX)~
window function definition
- alias: _wcol4
+ alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
@@ -1819,7 +1819,7 @@
isPivotResult: true
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2074,28 +2074,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2364,28 +2364,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2628,28 +2628,28 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol1
+ alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
- alias: _wcol2
+ alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: PRECEDING(MAX)~
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint)
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/join32_lessSize.q.out
===================================================================
--- ql/src/test/results/clientpositive/join32_lessSize.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/join32_lessSize.q.out (working copy)
@@ -130,7 +130,7 @@
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (value is not null and key is not null) (type: boolean)
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -139,31 +139,31 @@
HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
+ 1 _col0 (type: string)
Position of Big Table: 0
Stage: Stage-6
Map Reduce
Map Operator Tree:
TableScan
- alias: z
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3
Position of Big Table: 0
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -175,8 +175,8 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
- columns _col0,_col3
- columns.types string,string
+ columns _col1,_col2,_col3
+ columns.types string,string,string
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -190,6 +190,50 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+#### A masked pattern was here ####
+ Partition
base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -232,7 +276,16 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src1
name: default.src1
-#### A masked pattern was here ####
+ Truncated Path -> Alias:
+ /src [$hdt$_1:$hdt$_1:y]
+
+ Stage: Stage-7
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:z
+ Fetch Operator
+ limit: -1
+ Partition Description:
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -278,28 +331,19 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
- Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z]
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:y
- Fetch Operator
- limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:y
+ $hdt$_0:z
TableScan
- alias: y
+ alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: ((11.0 = 11.0) and value is not null) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
@@ -318,11 +362,11 @@
keys:
0 _col0 (type: string)
1 _col3 (type: string)
- outputColumnNames: _col1, _col2, _col5
+ outputColumnNames: _col0, _col4, _col5
Position of Big Table: 1
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -362,8 +406,8 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
- columns _col0,_col3
- columns.types string,string
+ columns _col1,_col2,_col3
+ columns.types string,string,string
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -371,16 +415,19 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
- columns _col0,_col3
- columns.types string,string
+ columns _col1,_col2,_col3
+ columns.types string,string,string
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -388,11 +435,13 @@
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -402,24 +451,21 @@
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
+ name: default.srcpart
+ name: default.srcpart
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -470,8 +516,8 @@
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
@@ -657,27 +703,28 @@
Stage-8 depends on stages: Stage-11
Stage-10 depends on stages: Stage-8
Stage-7 depends on stages: Stage-10
- Stage-9 depends on stages: Stage-7
- Stage-6 depends on stages: Stage-9
- Stage-0 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-7
Stage-2 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-11
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:$hdt$_2:$hdt$_2:x
+ $hdt$_1:$hdt$_2:x
Fetch Operator
limit: -1
+ $hdt$_1:$hdt$_3:x
+ Fetch Operator
+ limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_1:$hdt$_2:$hdt$_2:x
+ $hdt$_1:$hdt$_2:x
TableScan
alias: x
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key is not null and value is not null) (type: boolean)
+ predicate: (value is not null and key is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -686,12 +733,9 @@
HashTable Sink Operator
keys:
0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 1
-
- Stage: Stage-8
- Map Reduce
- Map Operator Tree:
+ 1 _col1 (type: string)
+ Position of Big Table: 0
+ $hdt$_1:$hdt$_3:x
TableScan
alias: x
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -704,92 +748,15 @@
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
+ HashTable Sink Operator
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Position of Big Table: 1
- Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col3
- columns.types string,string,string
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: src1
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src1
- numFiles 1
- numRows 25
- rawDataSize 191
- serialization.ddl struct src1 { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 216
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Position of Big Table: 0
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src1
- numFiles 1
- numRows 25
- rawDataSize 191
- serialization.ddl struct src1 { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 216
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src1
- name: default.src1
- Truncated Path -> Alias:
- /src1 [$hdt$_1:$hdt$_2:$hdt$_3:x]
-
- Stage: Stage-10
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_1:$hdt$_1:w
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_1:$hdt$_1:w
+ Stage: Stage-8
+ Map Reduce
+ Map Operator Tree:
TableScan
alias: w
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -802,26 +769,24 @@
expressions: value (type: string)
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col1 (type: string)
- Position of Big Table: 1
-
- Stage: Stage-7
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
+ outputColumnNames: _col1
+ Position of Big Table: 0
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col1, _col4
- Position of Big Table: 1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Position of Big Table: 0
+ Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -846,26 +811,6 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col3
- columns.types string,string,string
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col3
- columns.types string,string,string
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-#### A masked pattern was here ####
- Partition
base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -908,10 +853,54 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src
name: default.src
- Truncated Path -> Alias:
#### A masked pattern was here ####
+ Partition
+ base file name: src1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ numFiles 1
+ numRows 25
+ rawDataSize 191
+ serialization.ddl struct src1 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Stage: Stage-9
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ numFiles 1
+ numRows 25
+ rawDataSize 191
+ serialization.ddl struct src1 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src1
+ name: default.src1
+ Truncated Path -> Alias:
+ /src [$hdt$_1:$hdt$_1:w]
+
+ Stage: Stage-10
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:w
@@ -937,7 +926,7 @@
1 _col1 (type: string)
Position of Big Table: 1
- Stage: Stage-6
+ Stage: Stage-7
Map Reduce
Map Operator Tree:
TableScan
@@ -950,17 +939,17 @@
1 _col1 (type: string)
outputColumnNames: _col1, _col3, _col6
Position of Big Table: 1
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -993,7 +982,7 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: -mr-10001
+ base file name: -mr-10002
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
Index: ql/src/test/results/clientpositive/groupby_grouping_window.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_grouping_window.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out (working copy)
@@ -106,7 +106,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col3
name: rank
window function: GenericUDAFRankEvaluator
@@ -114,7 +114,7 @@
isPivotResult: true
Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _wcol0 (type: int)
+ expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Index: ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
===================================================================
--- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (working copy)
@@ -231,7 +231,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -239,7 +239,7 @@
isPivotResult: true
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Filter Operator
- predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean)
+ predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col5 (type: int)
@@ -399,7 +399,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -407,7 +407,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean)
+ predicate: ((rank_window_0 <= 2) and _col2 is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col5 (type: int)
@@ -842,7 +842,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -850,7 +850,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean)
+ predicate: ((rank_window_0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
@@ -997,7 +997,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col5
name: rank
window function: GenericUDAFRankEvaluator
@@ -1005,7 +1005,7 @@
isPivotResult: true
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_wcol0 <= 2) (type: boolean)
+ predicate: (rank_window_0 <= 2) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string), _col2 (type: string)
Index: ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
===================================================================
--- ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out (working copy)
@@ -808,32 +808,32 @@
alias: s
Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean)
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: s_store_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
TableScan
alias: s
Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: s_store_sk is not null (type: boolean)
- Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: s_store_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Join Operator
condition map:
@@ -843,10 +843,10 @@
0 _col0 (type: int)
1 _col0 (type: int)
2 _col0 (type: int)
- outputColumnNames: _col1
+ outputColumnNames: _col2
Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col1 (type: int)
+ expressions: _col2 (type: int)
outputColumnNames: _col0
Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
Index: ql/src/test/results/clientpositive/ctas_colname.q.out
===================================================================
--- ql/src/test/results/clientpositive/ctas_colname.q.out (revision 1673283)
+++ ql/src/test/results/clientpositive/ctas_colname.q.out (working copy)
@@ -198,7 +198,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
@@ -206,7 +206,7 @@
isPivotResult: true
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: int)
+ expressions: _col0 (type: string), _col1 (type: string), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -360,7 +360,7 @@
raw input shape:
window functions:
window function definition
- alias: _wcol0
+ alias: lead_window_0
arguments: _col0, 1
name: lead
window function: GenericUDAFLeadEvaluator
@@ -368,7 +368,7 @@
isPivotResult: true
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: string)
+ expressions: _col0 (type: string), _col1 (type: string), lead_window_0 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Limit
Index: ql/src/test/queries/clientpositive/cbo_join.q
===================================================================
--- ql/src/test/queries/clientpositive/cbo_join.q (revision 1673283)
+++ ql/src/test/queries/clientpositive/cbo_join.q (working copy)
@@ -4,6 +4,7 @@
set hive.stats.fetch.column.stats=true;
set hive.auto.convert.join=false;
+-- SORT_QUERY_RESULTS
-- 4. Test Select + Join + TS
select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key;
select cbo_t1.key from cbo_t1 join cbo_t3;
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java (working copy)
@@ -20,21 +20,64 @@
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys;
import com.google.common.collect.ImmutableList;
public class HiveDefaultRelMetadataProvider {
- private HiveDefaultRelMetadataProvider() {
+
+ private final HiveConf hiveConf;
+
+
+ public HiveDefaultRelMetadataProvider(HiveConf hiveConf) {
+ this.hiveConf = hiveConf;
}
- public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList
- .of(HiveRelMdDistinctRowCount.SOURCE,
+ public RelMetadataProvider getMetadataProvider() {
+
+ // Create cost metadata provider
+ final HiveCostModel cm;
+ if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
+ && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.EXTENDED_COST_MODEL)) {
+ final Double maxMemory = (double) HiveConf.getLongVar(
+ this.hiveConf,
+ HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
+ cm = new HiveOnTezCostModel(maxMemory);
+ } else {
+ cm = new HiveDefaultCostModel();
+ }
+
+ // Get max split size for HiveRelMdParallelism
+ final Double maxSplitSize = (double) HiveConf.getLongVar(
+ this.hiveConf,
+ HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
+
+ // Return MD provider
+ return ChainedRelMetadataProvider.of(ImmutableList
+ .of(new HiveRelMdCost(cm).getMetadataProvider(),
+ HiveRelMdDistinctRowCount.SOURCE,
HiveRelMdSelectivity.SOURCE,
HiveRelMdRowCount.SOURCE,
HiveRelMdUniqueKeys.SOURCE,
+ HiveRelMdSize.SOURCE,
+ HiveRelMdMemory.SOURCE,
+ new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(),
+ HiveRelMdDistribution.SOURCE,
+ HiveRelMdCollation.SOURCE,
new DefaultRelMetadataProvider()));
}
+
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java (working copy)
@@ -24,9 +24,9 @@
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.RelFactories.FilterFactory;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
public class HiveFilter extends Filter implements HiveRelNode {
@@ -48,7 +48,7 @@
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return HiveCost.FACTORY.makeZeroCost();
+ return RelMetadataQuery.getNonCumulativeCost(this);
}
/**
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java (working copy)
@@ -31,7 +31,6 @@
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
import com.google.common.collect.ImmutableList;
@@ -39,6 +38,8 @@
public static final HiveAggRelFactory HIVE_AGGR_REL_FACTORY = new HiveAggRelFactory();
+
+
public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
boolean indicator, ImmutableBitSet groupSet, List groupSets,
List aggCalls) throws InvalidRelException {
@@ -66,7 +67,7 @@
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return HiveCost.FACTORY.makeZeroCost();
+ return RelMetadataQuery.getNonCumulativeCost(this);
}
@Override
@@ -75,6 +76,11 @@
.makeLiteral(true));
}
+ public boolean isBucketedInput() {
+ return RelMetadataQuery.distribution(this.getInput()).getKeys().
+ containsAll(groupSet.asList());
+ }
+
private static class HiveAggRelFactory implements AggregateFactory {
@Override
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (working copy)
@@ -29,6 +29,7 @@
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
@@ -42,7 +43,6 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
-
import com.google.common.collect.ImmutableList;
public class HiveProject extends Project implements HiveRelNode {
@@ -172,7 +172,7 @@
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return HiveCost.FACTORY.makeZeroCost();
+ return RelMetadataQuery.getNonCumulativeCost(this);
}
@Override
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortExchange.java (revision 0)
@@ -0,0 +1,49 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.plan.Convention;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelDistribution;
+import org.apache.calcite.rel.RelDistributionTraitDef;
+import org.apache.calcite.rel.RelInput;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.SortExchange;
+
+public class HiveSortExchange extends SortExchange {
+
+ private HiveSortExchange(RelOptCluster cluster, RelTraitSet traitSet,
+ RelNode input, RelDistribution distribution, RelCollation collation) {
+ super(cluster, traitSet, input, distribution, collation);
+ }
+
+ public HiveSortExchange(RelInput input) {
+ super(input);
+ }
+
+ /**
+ * Creates a HiveSortExchange.
+ *
+ * @param input Input relational expression
+ * @param distribution Distribution specification
+ * @param collation Collation specification
+ */
+ public static HiveSortExchange create(RelNode input,
+ RelDistribution distribution, RelCollation collation) {
+ RelOptCluster cluster = input.getCluster();
+ distribution = RelDistributionTraitDef.INSTANCE.canonize(distribution);
+ RelTraitSet traitSet =
+ input.getTraitSet().replace(Convention.NONE).replace(distribution);
+ collation = RelCollationTraitDef.INSTANCE.canonize(collation);
+ return new HiveSortExchange(cluster, traitSet, input, distribution, collation);
+ }
+
+ @Override
+ public SortExchange copy(RelTraitSet traitSet, RelNode newInput, RelDistribution newDistribution,
+ RelCollation newCollation) {
+ return new HiveSortExchange(getCluster(), traitSet, newInput,
+ newDistribution, newCollation);
+ }
+
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (working copy)
@@ -17,21 +17,34 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+import java.util.ArrayList;
+import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableList.Builder;
+
/**
* Relational expression representing a scan of a HiveDB collection.
*
@@ -42,6 +55,14 @@
*/
public class HiveTableScan extends TableScan implements HiveRelNode {
+ private final RelDataType hiveTableScanRowType;
+ private final ImmutableList neededColIndxsFrmReloptHT;
+ private final String tblAlias;
+
+ public String getTableAlias() {
+ return tblAlias;
+ }
+
/**
* Creates a HiveTableScan.
*
@@ -54,10 +75,17 @@
* @param table
* HiveDB table
*/
- public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table,
- RelDataType rowtype) {
+ public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, String alias) {
+ this(cluster, traitSet, table, alias, table.getRowType());
+ }
+
+ private HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, String alias,
+ RelDataType newRowtype) {
super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table);
assert getConvention() == HiveRelNode.CONVENTION;
+ this.tblAlias = alias;
+ this.hiveTableScanRowType = newRowtype;
+ this.neededColIndxsFrmReloptHT = buildNeededColIndxsFrmReloptHT(table.getRowType(), newRowtype);
}
@Override
@@ -66,9 +94,21 @@
return this;
}
+ /**
+ * Copy TableScan operator with a new Row Schema. The new Row Schema can only
+ * be a subset of this TS schema.
+ *
+ * @param newRowtype
+ * @return
+ */
+ public HiveTableScan copy(RelDataType newRowtype) {
+ return new HiveTableScan(getCluster(), getTraitSet(), ((RelOptHiveTable) table), this.tblAlias,
+ newRowtype);
+ }
+
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return HiveCost.FACTORY.makeZeroCost();
+ return RelMetadataQuery.getNonCumulativeCost(this);
}
@Override
@@ -89,4 +129,62 @@
public List getColStat(List projIndxLst) {
return ((RelOptHiveTable) table).getColStat(projIndxLst);
}
-}
\ No newline at end of file
+
+ @Override
+ public RelNode project(ImmutableBitSet fieldsUsed, Set extraFields,
+ RelFactories.ProjectFactory projectFactory) {
+
+ // 1. If the schema is the same then bail out
+ final int fieldCount = getRowType().getFieldCount();
+ if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) {
+ return this;
+ }
+
+ // 2. Make sure there is no dynamic addition of virtual cols
+ if (extraFields != null && !extraFields.isEmpty()) {
+ throw new RuntimeException("Hive TS does not support adding virtual columns dynamically");
+ }
+
+ // 3. Create new TS schema that is a subset of original
+ final List fields = getRowType().getFieldList();
+ List fieldTypes = new LinkedList();
+ List fieldNames = new LinkedList();
+ List exprList = new ArrayList();
+ RexBuilder rexBuilder = getCluster().getRexBuilder();
+ for (int i : fieldsUsed) {
+ RelDataTypeField field = fields.get(i);
+ fieldTypes.add(field.getType());
+ fieldNames.add(field.getName());
+ exprList.add(rexBuilder.makeInputRef(this, i));
+ }
+
+ // 4. Build new TS
+ HiveTableScan newHT = copy(getCluster().getTypeFactory().createStructType(fieldTypes,
+ fieldNames));
+
+ // 5. Add Proj on top of TS
+ return projectFactory.createProject(newHT, exprList, new ArrayList(fieldNames));
+ }
+
+ public List getNeededColIndxsFrmReloptHT() {
+ return neededColIndxsFrmReloptHT;
+ }
+
+ public RelDataType getPrunedRowType() {
+ return hiveTableScanRowType;
+ }
+
+ private static ImmutableList buildNeededColIndxsFrmReloptHT(RelDataType htRowtype,
+ RelDataType scanRowType) {
+ Builder neededColIndxsFrmReloptHTBldr = new ImmutableList.Builder();
+ Map colNameToPosInReloptHT = HiveCalciteUtil.getRowColNameIndxMap(htRowtype
+ .getFieldList());
+ List colNamesInScanRowType = scanRowType.getFieldNames();
+
+ for (int i = 0; i < colNamesInScanRowType.size(); i++) {
+ neededColIndxsFrmReloptHTBldr.add(colNameToPosInReloptHT.get(colNamesInScanRowType.get(i)));
+ }
+
+ return neededColIndxsFrmReloptHTBldr.build();
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveLimit.java (working copy)
@@ -25,9 +25,9 @@
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.SingleRel;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
public class HiveLimit extends SingleRel implements HiveRelNode {
private final RexNode offset;
@@ -52,6 +52,6 @@
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return HiveCost.FACTORY.makeZeroCost();
+ return RelMetadataQuery.getNonCumulativeCost(this);
}
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java (working copy)
@@ -17,7 +17,9 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+import java.util.ArrayList;
import java.util.Collections;
+import java.util.List;
import java.util.Set;
import org.apache.calcite.plan.RelOptCluster;
@@ -25,7 +27,9 @@
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.InvalidRelException;
+import org.apache.calcite.rel.RelCollations;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.RelFactories.JoinFactory;
@@ -33,19 +37,15 @@
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.ImmutableIntList;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel.JoinAlgorithm;
//TODO: Should we convert MultiJoin to be a child of HiveJoin
public class HiveJoin extends Join implements HiveRelNode {
- // NOTE: COMMON_JOIN & SMB_JOIN are Sort Merge Join (in case of COMMON_JOIN
- // each parallel computation handles multiple splits where as in case of SMB
- // each parallel computation handles one bucket). MAP_JOIN and BUCKET_JOIN is
- // hash joins where MAP_JOIN keeps the whole data set of non streaming tables
- // in memory where as BUCKET_JOIN keeps only the b
- public enum JoinAlgorithm {
- NONE, COMMON_JOIN, MAP_JOIN, BUCKET_JOIN, SMB_JOIN
- }
public enum MapJoinStreamingRelation {
NONE, LEFT_RELATION, RIGHT_RELATION
@@ -54,17 +54,17 @@
public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl();
private final boolean leftSemiJoin;
- private final JoinAlgorithm joinAlgorithm;
- //This will be used once we do Join Algorithm selection
- @SuppressWarnings("unused")
- private final MapJoinStreamingRelation mapJoinStreamingSide = MapJoinStreamingRelation.NONE;
+ private JoinAlgorithm joinAlgorithm;
+ private RelOptCost joinCost;
+
public static HiveJoin getJoin(RelOptCluster cluster, RelNode left, RelNode right,
RexNode condition, JoinRelType joinType, boolean leftSemiJoin) {
try {
Set variablesStopped = Collections.emptySet();
- return new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped,
- JoinAlgorithm.NONE, null, leftSemiJoin);
+ HiveJoin join = new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped,
+ JoinAlgorithm.NONE, leftSemiJoin);
+ return join;
} catch (InvalidRelException e) {
throw new RuntimeException(e);
}
@@ -72,8 +72,7 @@
protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right,
RexNode condition, JoinRelType joinType, Set variablesStopped,
- JoinAlgorithm joinAlgo, MapJoinStreamingRelation streamingSideForMapJoin, boolean leftSemiJoin)
- throws InvalidRelException {
+ JoinAlgorithm joinAlgo, boolean leftSemiJoin) throws InvalidRelException {
super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType,
variablesStopped);
this.joinAlgorithm = joinAlgo;
@@ -90,7 +89,7 @@
try {
Set variablesStopped = Collections.emptySet();
return new HiveJoin(getCluster(), traitSet, left, right, conditionExpr, joinType,
- variablesStopped, JoinAlgorithm.NONE, null, leftSemiJoin);
+ variablesStopped, joinAlgorithm, leftSemiJoin);
} catch (InvalidRelException e) {
// Semantic error not possible. Must be a bug. Convert to
// internal error.
@@ -102,6 +101,54 @@
return joinAlgorithm;
}
+ public void setJoinAlgorithm(JoinAlgorithm joinAlgorithm) {
+ this.joinAlgorithm = joinAlgorithm;
+ }
+
+ public MapJoinStreamingRelation getMapJoinStreamingSide() {
+ Double leftInputSize = RelMetadataQuery.memory(left);
+ Double rightInputSize = RelMetadataQuery.memory(right);
+ if (leftInputSize == null && rightInputSize == null) {
+ return MapJoinStreamingRelation.NONE;
+ } else if (leftInputSize != null &&
+ (rightInputSize == null ||
+ (leftInputSize < rightInputSize))) {
+ return MapJoinStreamingRelation.RIGHT_RELATION;
+ } else if (rightInputSize != null &&
+ (leftInputSize == null ||
+ (rightInputSize <= leftInputSize))) {
+ return MapJoinStreamingRelation.LEFT_RELATION;
+ }
+ return MapJoinStreamingRelation.NONE;
+ }
+
+ public void setJoinCost(RelOptCost joinCost) {
+ this.joinCost = joinCost;
+ }
+
+ public ImmutableBitSet getSortedInputs() {
+ ImmutableBitSet.Builder sortedInputsBuilder = new ImmutableBitSet.Builder();
+ JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.
+ constructJoinPredicateInfo(this);
+ List joinKeysInChildren = new ArrayList();
+ joinKeysInChildren.add(
+ ImmutableIntList.copyOf(
+ joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()));
+ joinKeysInChildren.add(
+ ImmutableIntList.copyOf(
+ joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()));
+
+ for (int i=0; i fieldCollations) {
+ super(fieldCollations);
+ }
+
+}
+
+
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java (revision 0)
@@ -0,0 +1,20 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.plan.Context;
+import org.apache.hadoop.hive.conf.HiveConf;
+
+
+public class HiveConfigContext implements Context {
+ private HiveConf config;
+
+ public HiveConfigContext(HiveConf config) {
+ this.config = config;
+ }
+
+ public T unwrap(Class clazz) {
+ if (clazz.isInstance(config)) {
+ return clazz.cast(config);
+ }
+ return null;
+ }
+}
\ No newline at end of file
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostUtil.java (working copy)
@@ -18,26 +18,160 @@
package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import com.google.common.collect.ImmutableList;
+
// Use this once we have Join Algorithm selection
public class HiveCostUtil {
- private static final double cpuCostInNanoSec = 1.0;
- private static final double netCostInNanoSec = 150 * cpuCostInNanoSec;
- private static final double localFSWriteCostInNanoSec = 4 * netCostInNanoSec;
- private static final double localFSReadCostInNanoSec = 4 * netCostInNanoSec;
- private static final double hDFSWriteCostInNanoSec = 10 * localFSWriteCostInNanoSec;
- @SuppressWarnings("unused")
-//Use this once we have Join Algorithm selection
- private static final double hDFSReadCostInNanoSec = 1.5 * localFSReadCostInNanoSec;
+ private static final double CPU_COST = 1.0;
+ private static final double NET_COST = 150.0 * CPU_COST;
+ private static final double LOCAL_WRITE_COST = 4.0 * NET_COST;
+ private static final double LOCAL_READ_COST = 4.0 * NET_COST;
+ private static final double HDFS_WRITE_COST = 10.0 * LOCAL_WRITE_COST;
+ private static final double HDFS_READ_COST = 1.5 * LOCAL_READ_COST;
+
public static RelOptCost computCardinalityBasedCost(HiveRelNode hr) {
return new HiveCost(hr.getRows(), 0, 0);
}
public static HiveCost computeCost(HiveTableScan t) {
double cardinality = t.getRows();
- return new HiveCost(cardinality, 0, hDFSWriteCostInNanoSec * cardinality * 0);
+ return new HiveCost(cardinality, 0, HDFS_WRITE_COST * cardinality * 0);
}
+
+ public static double computeSortMergeCPUCost(
+ ImmutableList cardinalities,
+ ImmutableBitSet sorted) {
+ // Sort-merge join
+ double cpuCost = 0.0;
+ for (int i=0; i> relationInfos) {
+ // Sort-merge join
+ double ioCost = 0.0;
+ for (Pair relationInfo : relationInfos) {
+ ioCost += computeSortIOCost(relationInfo);
+ }
+ return ioCost;
+ }
+
+ public static double computeSortIOCost(Pair relationInfo) {
+ // Sort-merge join
+ double ioCost = 0.0;
+ double cardinality = relationInfo.left;
+ double averageTupleSize = relationInfo.right;
+ // Write cost
+ ioCost += cardinality * averageTupleSize * LOCAL_WRITE_COST;
+ // Read cost
+ ioCost += cardinality * averageTupleSize * LOCAL_READ_COST;
+ // Net transfer cost
+ ioCost += cardinality * averageTupleSize * NET_COST;
+ return ioCost;
+ }
+
+ public static double computeMapJoinCPUCost(
+ ImmutableList cardinalities,
+ ImmutableBitSet streaming) {
+ // Hash-join
+ double cpuCost = 0.0;
+ for (int i=0; i> relationInfos,
+ ImmutableBitSet streaming, int parallelism) {
+ // Hash-join
+ double ioCost = 0.0;
+ for (int i=0; i cardinalities,
+ ImmutableBitSet streaming) {
+ // Hash-join
+ double cpuCost = 0.0;
+ for (int i=0; i> relationInfos,
+ ImmutableBitSet streaming, int parallelism) {
+ // Hash-join
+ double ioCost = 0.0;
+ for (int i=0; i cardinalities) {
+ // Hash-join
+ double cpuCost = 0.0;
+ for (int i=0; i> relationInfos,
+ ImmutableBitSet streaming, int parallelism) {
+ // Hash-join
+ double ioCost = 0.0;
+ for (int i=0; i possibleAlgorithms = getExecutableJoinAlgorithms(join);
+
+ // Select algorithm with min cost
+ JoinAlgorithm joinAlgorithm = null;
+ RelOptCost minJoinCost = null;
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Join algorithm selection for:\n" + RelOptUtil.toString(join));
+ }
+ for (JoinAlgorithm possibleAlgorithm : possibleAlgorithms) {
+ RelOptCost joinCost = getJoinCost(join, possibleAlgorithm);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(possibleAlgorithm + " cost: " + joinCost);
+ }
+ if (minJoinCost == null || joinCost.isLt(minJoinCost) ) {
+ joinAlgorithm = possibleAlgorithm;
+ minJoinCost = joinCost;
+ }
+ }
+ join.setJoinAlgorithm(joinAlgorithm);
+ join.setJoinCost(minJoinCost);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(joinAlgorithm + " selected");
+ }
+
+ return minJoinCost;
+ }
+
+ /**
+ * Returns the possible algorithms for a given join operator.
+ *
+ * @param join the join operator
+ * @return a set containing all the possible join algorithms that can be
+ * executed for this join operator
+ */
+ abstract EnumSet getExecutableJoinAlgorithms(HiveJoin join);
+
+ /**
+ * Returns the cost for a given algorithm and execution engine.
+ *
+ * @param join the join operator
+ * @param algorithm the join algorithm
+ * @return the cost for the given algorithm, or null if the algorithm is not
+ * defined for this execution engine
+ */
+ abstract RelOptCost getJoinCost(HiveJoin join, JoinAlgorithm algorithm);
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java (revision 0)
@@ -0,0 +1,367 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
+
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.rel.RelDistribution;
+import org.apache.calcite.rel.RelDistribution.Type;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.ImmutableIntList;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Cost model for Tez execution engine.
+ */
+public class HiveOnTezCostModel extends HiveCostModel {
+
+ private final Double maxMemory;
+
+
+ public HiveOnTezCostModel(Double maxMemory) {
+ this.maxMemory = maxMemory;
+ }
+
+ @Override
+ public RelOptCost getDefaultCost() {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public RelOptCost getAggregateCost(HiveAggregate aggregate) {
+ if (aggregate.isBucketedInput()) {
+ return HiveCost.FACTORY.makeZeroCost();
+ } else {
+ // 1. Sum of input cardinalities
+ final Double rCount = RelMetadataQuery.getRowCount(aggregate.getInput());
+ if (rCount == null) {
+ return null;
+ }
+ // 2. CPU cost = sorting cost
+ final double cpuCost = HiveCostUtil.computeSortCPUCost(rCount);
+ // 3. IO cost = cost of writing intermediary results to local FS +
+ // cost of reading from local FS for transferring to GBy +
+ // cost of transferring map outputs to GBy operator
+ final Double rAverageSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput());
+ if (rAverageSize == null) {
+ return null;
+ }
+ final double ioCost = HiveCostUtil.computeSortIOCost(new Pair(rCount,rAverageSize));
+ // 4. Result
+ return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
+ }
+ }
+
+ @Override
+ protected EnumSet getExecutableJoinAlgorithms(HiveJoin join) {
+ Set possibleAlgorithms = new HashSet();
+
+ // Check streaming side
+ RelNode smallInput;
+ if (join.getMapJoinStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) {
+ smallInput = join.getRight();
+ } else if (join.getMapJoinStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) {
+ smallInput = join.getLeft();
+ } else {
+ smallInput = null;
+ }
+
+ if (smallInput != null) {
+ // Requirements:
+ // - For SMB, sorted by their keys on both sides and bucketed.
+ // - For Bucket, bucketed by their keys on both sides. / Fitting in memory
+ // - For Map, no additional requirement. / Fitting in memory
+
+ // Get key columns
+ JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.
+ constructJoinPredicateInfo(join);
+ List joinKeysInChildren = new ArrayList();
+ joinKeysInChildren.add(
+ ImmutableIntList.copyOf(
+ joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()));
+ joinKeysInChildren.add(
+ ImmutableIntList.copyOf(
+ joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()));
+
+ // Obtain number of buckets
+ Integer buckets = RelMetadataQuery.splitCount(smallInput);
+ // Obtain map algorithms for which smallest input fits in memory
+ boolean bucketFitsMemory = false;
+ boolean inputFitsMemory = false;
+ if (buckets != null) {
+ bucketFitsMemory = isFittingIntoMemory(maxMemory, smallInput, buckets);
+ }
+ inputFitsMemory = bucketFitsMemory ?
+ isFittingIntoMemory(maxMemory, smallInput, 1) : false;
+ boolean orderedBucketed = true;
+ boolean bucketed = true;
+ for (int i=0; i maxSize) {
+ return false;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ protected RelOptCost getJoinCost(HiveJoin join, JoinAlgorithm algorithm) {
+ RelOptCost algorithmCost;
+ switch (algorithm) {
+ case COMMON_JOIN:
+ algorithmCost = computeCostCommonJoin(join);
+ break;
+ case MAP_JOIN:
+ algorithmCost = computeCostMapJoin(join);
+ break;
+ case BUCKET_JOIN:
+ algorithmCost = computeCostBucketJoin(join);
+ break;
+ case SMB_JOIN:
+ algorithmCost = computeCostSMBJoin(join);
+ break;
+ default:
+ algorithmCost = null;
+ }
+ return algorithmCost;
+ }
+
+ private static RelOptCost computeCostCommonJoin(HiveJoin join) {
+ // 1. Sum of input cardinalities
+ final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
+ final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ if (leftRCount == null || rightRCount == null) {
+ return null;
+ }
+ final double rCount = leftRCount + rightRCount;
+ // 2. CPU cost = sorting cost (for each relation) +
+ // total merge cost
+ ImmutableList cardinalities = new ImmutableList.Builder().
+ add(leftRCount).
+ add(rightRCount).
+ build();
+ final double cpuCost = HiveCostUtil.computeSortMergeCPUCost(cardinalities, join.getSortedInputs());
+ // 3. IO cost = cost of writing intermediary results to local FS +
+ // cost of reading from local FS for transferring to join +
+ // cost of transferring map outputs to Join operator
+ final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
+ final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
+ if (leftRAverageSize == null || rightRAverageSize == null) {
+ return null;
+ }
+ ImmutableList> relationInfos = new ImmutableList.Builder>().
+ add(new Pair(leftRCount,leftRAverageSize)).
+ add(new Pair(rightRCount,rightRAverageSize)).
+ build();
+ final double ioCost = HiveCostUtil.computeSortMergeIOCost(relationInfos);
+ // 4. Result
+ return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
+ }
+
+ private static RelOptCost computeCostMapJoin(HiveJoin join) {
+ // 1. Sum of input cardinalities
+ final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
+ final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ if (leftRCount == null || rightRCount == null) {
+ return null;
+ }
+ final double rCount = leftRCount + rightRCount;
+ // 2. CPU cost = HashTable construction cost +
+ // join cost
+ ImmutableList cardinalities = new ImmutableList.Builder().
+ add(leftRCount).
+ add(rightRCount).
+ build();
+ ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder();
+ switch (join.getMapJoinStreamingSide()) {
+ case LEFT_RELATION:
+ streamingBuilder.set(0);
+ break;
+ case RIGHT_RELATION:
+ streamingBuilder.set(1);
+ break;
+ default:
+ return null;
+ }
+ ImmutableBitSet streaming = streamingBuilder.build();
+ final double cpuCost = HiveCostUtil.computeMapJoinCPUCost(cardinalities, streaming);
+ // 3. IO cost = cost of transferring small tables to join node *
+ // degree of parallelism
+ final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
+ final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
+ if (leftRAverageSize == null || rightRAverageSize == null) {
+ return null;
+ }
+ ImmutableList> relationInfos = new ImmutableList.Builder>().
+ add(new Pair(leftRCount,leftRAverageSize)).
+ add(new Pair(rightRCount,rightRAverageSize)).
+ build();
+ final int parallelism = RelMetadataQuery.splitCount(join) == null
+ ? 1 : RelMetadataQuery.splitCount(join);
+ final double ioCost = HiveCostUtil.computeMapJoinIOCost(relationInfos, streaming, parallelism);
+ // 4. Result
+ return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
+ }
+
+ private static RelOptCost computeCostBucketJoin(HiveJoin join) {
+ // 1. Sum of input cardinalities
+ final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
+ final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ if (leftRCount == null || rightRCount == null) {
+ return null;
+ }
+ final double rCount = leftRCount + rightRCount;
+ // 2. CPU cost = HashTable construction cost +
+ // join cost
+ ImmutableList cardinalities = new ImmutableList.Builder().
+ add(leftRCount).
+ add(rightRCount).
+ build();
+ ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder();
+ switch (join.getMapJoinStreamingSide()) {
+ case LEFT_RELATION:
+ streamingBuilder.set(0);
+ break;
+ case RIGHT_RELATION:
+ streamingBuilder.set(1);
+ break;
+ default:
+ return null;
+ }
+ ImmutableBitSet streaming = streamingBuilder.build();
+ final double cpuCost = HiveCostUtil.computeBucketMapJoinCPUCost(cardinalities, streaming);
+ // 3. IO cost = cost of transferring small tables to join node *
+ // degree of parallelism
+ final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
+ final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
+ if (leftRAverageSize == null || rightRAverageSize == null) {
+ return null;
+ }
+ ImmutableList> relationInfos = new ImmutableList.Builder>().
+ add(new Pair(leftRCount,leftRAverageSize)).
+ add(new Pair(rightRCount,rightRAverageSize)).
+ build();
+ final int parallelism = RelMetadataQuery.splitCount(join) == null
+ ? 1 : RelMetadataQuery.splitCount(join);
+ final double ioCost = HiveCostUtil.computeBucketMapJoinIOCost(relationInfos, streaming, parallelism);
+ // 4. Result
+ return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
+ }
+
+ private static RelOptCost computeCostSMBJoin(HiveJoin join) {
+ // 1. Sum of input cardinalities
+ final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
+ final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ if (leftRCount == null || rightRCount == null) {
+ return null;
+ }
+ final double rCount = leftRCount + rightRCount;
+ // 2. CPU cost = HashTable construction cost +
+ // join cost
+ ImmutableList cardinalities = new ImmutableList.Builder().
+ add(leftRCount).
+ add(rightRCount).
+ build();
+ ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder();
+ switch (join.getMapJoinStreamingSide()) {
+ case LEFT_RELATION:
+ streamingBuilder.set(0);
+ break;
+ case RIGHT_RELATION:
+ streamingBuilder.set(1);
+ break;
+ default:
+ return null;
+ }
+ ImmutableBitSet streaming = streamingBuilder.build();
+ final double cpuCost = HiveCostUtil.computeSMBMapJoinCPUCost(cardinalities);
+ // 3. IO cost = cost of transferring small tables to join node *
+ // degree of parallelism
+ final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
+ final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
+ if (leftRAverageSize == null || rightRAverageSize == null) {
+ return null;
+ }
+ ImmutableList> relationInfos = new ImmutableList.Builder>().
+ add(new Pair(leftRCount,leftRAverageSize)).
+ add(new Pair(rightRCount,rightRAverageSize)).
+ build();
+ final int parallelism = RelMetadataQuery.splitCount(join) == null
+ ? 1 : RelMetadataQuery.splitCount(join);
+ final double ioCost = HiveCostUtil.computeSMBMapJoinIOCost(relationInfos, streaming, parallelism);
+ // 4. Result
+ return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
+ }
+
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java (revision 0)
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdPercentageOriginalRows;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * HiveRelMdCost supplies the implementation of cost model.
+ */
+public class HiveRelMdCost {
+
+ private final HiveCostModel hiveCostModel;
+
+ public HiveRelMdCost(HiveCostModel hiveCostModel) {
+ this.hiveCostModel = hiveCostModel;
+ }
+
+ public RelMetadataProvider getMetadataProvider() {
+ return ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+ ReflectiveRelMetadataProvider.reflectiveSource(this,
+ BuiltInMethod.NON_CUMULATIVE_COST.method),
+ RelMdPercentageOriginalRows.SOURCE));
+ }
+
+ public RelOptCost getNonCumulativeCost(HiveAggregate aggregate) {
+ return hiveCostModel.getAggregateCost(aggregate);
+ }
+
+ public RelOptCost getNonCumulativeCost(HiveJoin join) {
+ return hiveCostModel.getJoinCost(join);
+ }
+
+ // Default case
+ public RelOptCost getNonCumulativeCost(RelNode rel) {
+ return hiveCostModel.getDefaultCost();
+ }
+
+}
+
+// End HiveRelMdCost.java
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java (revision 0)
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
+
+import java.util.EnumSet;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+
+/**
+ * Default implementation of the cost model.
+ * Currently used by MR and Spark execution engines.
+ */
+public class HiveDefaultCostModel extends HiveCostModel {
+
+ @Override
+ public RelOptCost getDefaultCost() {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public RelOptCost getAggregateCost(HiveAggregate aggregate) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ protected EnumSet getExecutableJoinAlgorithms(HiveJoin join) {
+ return EnumSet.of(JoinAlgorithm.NONE);
+ }
+
+ @Override
+ protected RelOptCost getJoinCost(HiveJoin join, JoinAlgorithm algorithm) {
+ RelOptCost algorithmCost;
+ switch (algorithm) {
+ case NONE:
+ algorithmCost = computeJoinCardinalityCost(join);
+ break;
+ default:
+ algorithmCost = null;
+ }
+ return algorithmCost;
+ }
+
+ private static RelOptCost computeJoinCardinalityCost(HiveJoin join) {
+ double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
+ double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0);
+ }
+
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java (working copy)
@@ -22,6 +22,7 @@
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.volcano.VolcanoPlanner;
import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext;
/**
* Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive.
@@ -34,12 +35,12 @@
private static final boolean ENABLE_COLLATION_TRAIT = true;
/** Creates a HiveVolcanoPlanner. */
- public HiveVolcanoPlanner() {
- super(HiveCost.FACTORY, null);
+ public HiveVolcanoPlanner(HiveConfigContext conf) {
+ super(HiveCost.FACTORY, conf);
}
- public static RelOptPlanner createPlanner() {
- final VolcanoPlanner planner = new HiveVolcanoPlanner();
+ public static RelOptPlanner createPlanner(HiveConfigContext conf) {
+ final VolcanoPlanner planner = new HiveVolcanoPlanner(conf);
planner.addRelTraitDef(ConventionTraitDef.INSTANCE);
if (ENABLE_COLLATION_TRAIT) {
planner.addRelTraitDef(RelCollationTraitDef.INSTANCE);
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java (revision 1673283)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java (working copy)
@@ -54,11 +54,13 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
@@ -69,8 +71,8 @@
public class ASTConverter {
private static final Log LOG = LogFactory.getLog(ASTConverter.class);
- private RelNode root;
- private HiveAST hiveAST;
+ private final RelNode root;
+ private final HiveAST hiveAST;
private RelNode from;
private Filter where;
private Aggregate groupBy;
@@ -213,7 +215,7 @@
private void convertLimitToASTNode(HiveSort limit) {
if (limit != null) {
- HiveSort hiveLimit = (HiveSort) limit;
+ HiveSort hiveLimit = limit;
RexNode limitExpr = hiveLimit.getFetchExpr();
if (limitExpr != null) {
Object val = ((RexLiteral) limitExpr).getValue2();
@@ -224,12 +226,12 @@
private void convertOBToASTNode(HiveSort order) {
if (order != null) {
- HiveSort hiveSort = (HiveSort) order;
+ HiveSort hiveSort = order;
if (!hiveSort.getCollation().getFieldCollations().isEmpty()) {
// 1 Add order by token
ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY");
- schema = new Schema((HiveSort) hiveSort);
+ schema = new Schema(hiveSort);
Map obRefToCallMap = hiveSort.getInputRefToCallMap();
RexNode obExpr;
ASTNode astCol;
@@ -370,7 +372,7 @@
static class RexVisitor extends RexVisitorImpl {
private final Schema schema;
- private boolean useTypeQualInLiteral;
+ private final boolean useTypeQualInLiteral;
protected RexVisitor(Schema schema) {
this(schema, false);
@@ -567,7 +569,7 @@
private static final long serialVersionUID = 1L;
Schema(TableScan scan) {
- String tabName = ((RelOptHiveTable) scan.getTable()).getTableAlias();
+ String tabName = ((HiveTableScan) scan).getTableAlias();
for (RelDataTypeField field : scan.getRowType().getFieldList()) {
add(new ColumnInfo(tabName, field.getName()));
}
@@ -641,7 +643,13 @@
add(new ColumnInfo(null, projName));
}
}
+
+ public Schema(String tabAlias, List fieldList) {
+ for (RelDataTypeField field : fieldList) {
+ add(new ColumnInfo(tabAlias, field.getName()));
}
+ }
+ }
/*
* represents Column information exposed by a QueryBlock.
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java (revision 0)
@@ -0,0 +1,891 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelDistribution;
+import org.apache.calcite.rel.RelDistribution.Type;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.logical.LogicalExchange;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.Pair;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.parse.JoinCond;
+import org.apache.hadoop.hive.ql.parse.JoinType;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
+import org.apache.hadoop.hive.ql.parse.PTFTranslator;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.UnparseTranslator;
+import org.apache.hadoop.hive.ql.parse.WindowingComponentizer;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc;
+import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.plan.UnionDesc;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+public class HiveOpConverter {
+
+ private static final Log LOG = LogFactory.getLog(HiveOpConverter.class);
+
+ public static enum HIVEAGGOPMODE {
+ NO_SKEW_NO_MAP_SIDE_AGG, // Corresponds to SemAnalyzer genGroupByPlan1MR
+ SKEW_NO_MAP_SIDE_AGG, // Corresponds to SemAnalyzer genGroupByPlan2MR
+ NO_SKEW_MAP_SIDE_AGG, // Corresponds to SemAnalyzer
+ // genGroupByPlanMapAggrNoSkew
+ SKEW_MAP_SIDE_AGG // Corresponds to SemAnalyzer genGroupByPlanMapAggr2MR
+ };
+
+ // TODO: remove this after stashing only rqd pieces from opconverter
+ private final SemanticAnalyzer semanticAnalyzer;
+ private final HiveConf hiveConf;
+ private final UnparseTranslator unparseTranslator;
+ private final Map> topOps;
+ private final boolean strictMode;
+ private int reduceSinkTagGenerator;
+
+ public HiveOpConverter(SemanticAnalyzer semanticAnalyzer, HiveConf hiveConf,
+ UnparseTranslator unparseTranslator, Map> topOps,
+ boolean strictMode) {
+ this.semanticAnalyzer = semanticAnalyzer;
+ this.hiveConf = hiveConf;
+ this.unparseTranslator = unparseTranslator;
+ this.topOps = topOps;
+ this.strictMode = strictMode;
+ this.reduceSinkTagGenerator = 0;
+ }
+
+ static class OpAttr {
+ final String tabAlias;
+ ImmutableList inputs;
+ ImmutableMap vcolMap;
+
+ OpAttr(String tabAlias, Map vcolMap, Operator... inputs) {
+ this.tabAlias = tabAlias;
+ this.vcolMap = ImmutableMap.copyOf(vcolMap);
+ this.inputs = ImmutableList.copyOf(inputs);
+ }
+
+ private OpAttr clone(Operator... inputs) {
+ return new OpAttr(tabAlias, this.vcolMap, inputs);
+ }
+ }
+
+ public Operator convert(RelNode root) throws SemanticException {
+ OpAttr opAf = dispatch(root);
+ return opAf.inputs.get(0);
+ }
+
+ OpAttr dispatch(RelNode rn) throws SemanticException {
+ if (rn instanceof HiveTableScan) {
+ return visit((HiveTableScan) rn);
+ } else if (rn instanceof HiveProject) {
+ return visit((HiveProject) rn);
+ } else if (rn instanceof HiveJoin) {
+ return visit((HiveJoin) rn);
+ } else if (rn instanceof SemiJoin) {
+ SemiJoin sj = (SemiJoin) rn;
+ HiveJoin hj = HiveJoin.getJoin(sj.getCluster(), sj.getLeft(), sj.getRight(),
+ sj.getCondition(), sj.getJoinType(), true);
+ return visit(hj);
+ } else if (rn instanceof HiveFilter) {
+ return visit((HiveFilter) rn);
+ } else if (rn instanceof HiveSort) {
+ return visit((HiveSort) rn);
+ } else if (rn instanceof HiveUnion) {
+ return visit((HiveUnion) rn);
+ } else if (rn instanceof LogicalExchange) {
+ return visit((LogicalExchange) rn);
+ } else if (rn instanceof HiveAggregate) {
+ return visit((HiveAggregate) rn);
+ }
+ LOG.error(rn.getClass().getCanonicalName() + "operator translation not supported"
+ + " yet in return path.");
+ return null;
+ }
+
+ /**
+ * TODO: 1. PPD needs to get pushed in to TS
+ *
+ * @param scanRel
+ * @return
+ */
+ OpAttr visit(HiveTableScan scanRel) {
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName()
+ + " with row type: [" + scanRel.getRowType() + "]");
+ }
+
+ RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
+
+ // 1. Setup TableScan Desc
+ // 1.1 Build col details used by scan
+ ArrayList colInfos = new ArrayList();
+ List virtualCols = new ArrayList(ht.getVirtualCols());
+ Map hiveScanVColMap = new HashMap();
+ List partColNames = new ArrayList();
+ List neededColumnIDs = new ArrayList();
+ List neededColumns = new ArrayList();
+
+ Map posToVColMap = HiveCalciteUtil.getVColsMap(virtualCols,
+ ht.getNoOfNonVirtualCols());
+ Map posToPartColInfo = ht.getPartColInfoMap();
+ Map posToNonPartColInfo = ht.getNonPartColInfoMap();
+ List neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
+ List scanColNames = scanRel.getRowType().getFieldNames();
+ String tableAlias = scanRel.getTableAlias();
+
+ String colName;
+ ColumnInfo colInfo;
+ VirtualColumn vc;
+ Integer posInRHT;
+
+ for (int i = 0; i < neededColIndxsFrmReloptHT.size(); i++) {
+ colName = scanColNames.get(i);
+ posInRHT = neededColIndxsFrmReloptHT.get(i);
+ if (posToVColMap.containsKey(posInRHT)) {
+ vc = posToVColMap.get(posInRHT);
+ virtualCols.add(vc);
+ colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
+ hiveScanVColMap.put(i, vc);
+ } else if (posToPartColInfo.containsKey(posInRHT)) {
+ partColNames.add(colName);
+ colInfo = posToPartColInfo.get(posInRHT);
+ } else {
+ colInfo = posToNonPartColInfo.get(posInRHT);
+ }
+ neededColumnIDs.add(posInRHT);
+ neededColumns.add(colName);
+ colInfos.add(colInfo);
+ }
+
+ // 1.2 Create TableScanDesc
+ TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
+
+ // 1.3. Set Partition cols in TSDesc
+ tsd.setPartColumns(partColNames);
+
+ // 1.4. Set needed cols in TSDesc
+ tsd.setNeededColumnIDs(neededColumnIDs);
+ tsd.setNeededColumns(neededColumns);
+
+ // 2. Setup TableScan
+ TableScanOperator ts = (TableScanOperator) OperatorFactory.get(tsd, new RowSchema(colInfos));
+
+ topOps.put(ht.getQBID(), ts);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
+ }
+
+ return new OpAttr(tableAlias, hiveScanVColMap, ts);
+ }
+
+ OpAttr visit(HiveProject projectRel) throws SemanticException {
+ OpAttr inputOpAf = dispatch(projectRel.getInput());
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Translating operator rel#" + projectRel.getId() + ":"
+ + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]");
+ }
+
+ WindowingSpec windowingSpec = new WindowingSpec();
+ List exprCols = new ArrayList();
+ for (int pos = 0; pos < projectRel.getChildExps().size(); pos++) {
+ ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel
+ .getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(),
+ projectRel.getRowType(), false, projectRel.getCluster().getTypeFactory());
+ exprCols.add(projectRel.getChildExps().get(pos).accept(converter));
+ if (converter.getWindowFunctionSpec() != null) {
+ windowingSpec.addWindowFunction(converter.getWindowFunctionSpec());
+ }
+ }
+ if (windowingSpec.getWindowExpressions() != null
+ && !windowingSpec.getWindowExpressions().isEmpty()) {
+ inputOpAf = genPTF(inputOpAf, windowingSpec);
+ }
+ // TODO: is this a safe assumption (name collision, external names...)
+ List exprNames = new ArrayList(projectRel.getRowType().getFieldNames());
+ SelectDesc sd = new SelectDesc(exprCols, exprNames);
+ Pair, Map> colInfoVColPair = createColInfos(
+ projectRel.getChildExps(), exprCols, exprNames, inputOpAf);
+ SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema(
+ colInfoVColPair.getKey()), inputOpAf.inputs.get(0));
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]");
+ }
+
+ return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp);
+ }
+
+ OpAttr visit(HiveJoin joinRel) throws SemanticException {
+ // 1. Convert inputs
+ OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
+ List> children = new ArrayList>(joinRel.getInputs().size());
+ for (int i = 0; i < inputs.length; i++) {
+ inputs[i] = dispatch(joinRel.getInput(i));
+ children.add(inputs[i].inputs.get(0));
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName()
+ + " with row type: [" + joinRel.getRowType() + "]");
+ }
+
+ // 2. Convert join condition
+ JoinPredicateInfo joinPredInfo = JoinPredicateInfo.constructJoinPredicateInfo(joinRel);
+
+ // 3. Extract join keys from condition
+ ExprNodeDesc[][] joinKeys = extractJoinKeys(joinPredInfo, joinRel.getInputs());
+
+ // 4. Generate Join operator
+ JoinOperator joinOp = genJoin(joinRel, joinPredInfo, children, joinKeys);
+
+ // 5. TODO: Extract condition for non-equi join elements (if any) and
+ // add it
+
+ // 6. Virtual columns
+ Map vcolMap = new HashMap();
+ vcolMap.putAll(inputs[0].vcolMap);
+ if (extractJoinType(joinRel) != JoinType.LEFTSEMI) {
+ int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
+ for (int i = 1; i < inputs.length; i++) {
+ vcolMap.putAll(HiveCalciteUtil.shiftVColsMap(inputs[i].vcolMap, shift));
+ shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
+ }
+ }
+
+ // 8. Return result
+ return new OpAttr(null, vcolMap, joinOp);
+ }
+
+ OpAttr visit(HiveAggregate aggRel) throws SemanticException {
+ OpAttr inputOpAf = dispatch(aggRel.getInput());
+ return HiveGBOpConvUtil.translateGB(inputOpAf, aggRel, hiveConf);
+ }
+
+ OpAttr visit(HiveSort sortRel) throws SemanticException {
+ OpAttr inputOpAf = dispatch(sortRel.getInput());
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName()
+ + " with row type: [" + sortRel.getRowType() + "]");
+ if (sortRel.getCollation() == RelCollations.EMPTY) {
+ LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName()
+ + " consists of limit");
+ } else if (sortRel.fetch == null) {
+ LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName()
+ + " consists of sort");
+ } else {
+ LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName()
+ + " consists of sort+limit");
+ }
+ }
+
+ Operator> inputOp = inputOpAf.inputs.get(0);
+ Operator> resultOp = inputOpAf.inputs.get(0);
+ // 1. If we need to sort tuples based on the value of some
+ // of their columns
+ if (sortRel.getCollation() != RelCollations.EMPTY) {
+
+ // In strict mode, in the presence of order by, limit must be
+ // specified
+ if (strictMode && sortRel.fetch == null) {
+ throw new SemanticException(ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg());
+ }
+
+ // 1.a. Extract order for each column from collation
+ // Generate sortCols and order
+ List sortCols = new ArrayList();
+ StringBuilder order = new StringBuilder();
+ for (RelCollation collation : sortRel.getCollationList()) {
+ for (RelFieldCollation sortInfo : collation.getFieldCollations()) {
+ int sortColumnPos = sortInfo.getFieldIndex();
+ ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature()
+ .get(sortColumnPos));
+ ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(),
+ columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
+ sortCols.add(sortColumn);
+ if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
+ order.append("-");
+ } else {
+ order.append("+");
+ }
+ }
+ }
+ // Use only 1 reducer for order by
+ int numReducers = 1;
+
+ // 1.b. Generate reduce sink and project operator
+ resultOp = genReduceSinkAndBacktrackSelect(resultOp,
+ sortCols.toArray(new ExprNodeDesc[sortCols.size()]), -1, new ArrayList(),
+ order.toString(), numReducers, Operation.NOT_ACID, strictMode);
+ }
+
+ // 2. If we need to generate limit
+ if (sortRel.fetch != null) {
+ int limit = RexLiteral.intValue(sortRel.fetch);
+ LimitDesc limitDesc = new LimitDesc(limit);
+ // TODO: Set 'last limit' global property
+ ArrayList cinfoLst = createColInfos(inputOp);
+ resultOp = OperatorFactory.getAndMakeChild(limitDesc,
+ new RowSchema(cinfoLst), resultOp);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
+ }
+ }
+
+ // 3. Return result
+ return inputOpAf.clone(resultOp);
+ }
+
+ /**
+ * TODO: 1) isSamplingPred 2) sampleDesc 3) isSortedFilter
+ */
+ OpAttr visit(HiveFilter filterRel) throws SemanticException {
+ OpAttr inputOpAf = dispatch(filterRel.getInput());
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Translating operator rel#" + filterRel.getId() + ":" + filterRel.getRelTypeName()
+ + " with row type: [" + filterRel.getRowType() + "]");
+ }
+
+ ExprNodeDesc filCondExpr = filterRel.getCondition().accept(
+ new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), false,
+ filterRel.getCluster().getTypeFactory()));
+ FilterDesc filDesc = new FilterDesc(filCondExpr, false);
+ ArrayList cinfoLst = createColInfos(inputOpAf.inputs.get(0));
+ FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, new RowSchema(
+ cinfoLst), inputOpAf.inputs.get(0));
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + filOp + " with row schema: [" + filOp.getSchema() + "]");
+ }
+
+ return inputOpAf.clone(filOp);
+ }
+
+ OpAttr visit(HiveUnion unionRel) throws SemanticException {
+ // 1. Convert inputs
+ OpAttr[] inputs = new OpAttr[unionRel.getInputs().size()];
+ for (int i = 0; i < inputs.length; i++) {
+ inputs[i] = dispatch(unionRel.getInput(i));
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName()
+ + " with row type: [" + unionRel.getRowType() + "]");
+ }
+
+ // 2. Create a new union operator
+ UnionDesc unionDesc = new UnionDesc();
+ unionDesc.setNumInputs(inputs.length);
+ ArrayList cinfoLst = createColInfos(inputs[0].inputs.get(0));
+ Operator>[] children = new Operator>[inputs.length];
+ for (int i = 0; i < children.length; i++) {
+ children[i] = inputs[i].inputs.get(0);
+ }
+ Operator extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild(unionDesc,
+ new RowSchema(cinfoLst), children);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]");
+ }
+
+ // 3. Return result
+ return inputs[0].clone(unionOp);
+ }
+
+ OpAttr visit(LogicalExchange exchangeRel) throws SemanticException {
+ OpAttr inputOpAf = dispatch(exchangeRel.getInput());
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":"
+ + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
+ }
+
+ RelDistribution distribution = exchangeRel.getDistribution();
+ if (distribution.getType() != Type.HASH_DISTRIBUTED) {
+ throw new SemanticException("Only hash distribution supported for LogicalExchange");
+ }
+ ExprNodeDesc[] expressions = new ExprNodeDesc[distribution.getKeys().size()];
+ for (int i = 0; i < distribution.getKeys().size(); i++) {
+ int key = distribution.getKeys().get(i);
+ ColumnInfo colInfo = inputOpAf.inputs.get(0).getSchema().getSignature().get(key);
+ ExprNodeDesc column = new ExprNodeColumnDesc(colInfo);
+ expressions[i] = column;
+ }
+
+ ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), expressions,
+ reduceSinkTagGenerator++, -1, Operation.NOT_ACID, strictMode);
+
+ return inputOpAf.clone(rsOp);
+ }
+
+ private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticException {
+ Operator> input = inputOpAf.inputs.get(0);
+
+ wSpec.validateAndMakeEffective();
+ WindowingComponentizer groups = new WindowingComponentizer(wSpec);
+ RowResolver rr = new RowResolver();
+ for (ColumnInfo ci : input.getSchema().getSignature()) {
+ rr.put(ci.getTabAlias(), ci.getInternalName(), ci);
+ }
+
+ while (groups.hasNext()) {
+ wSpec = groups.next(hiveConf, semanticAnalyzer, unparseTranslator, rr);
+
+ // 1. Create RS and backtrack Select operator on top
+ ArrayList keyCols = new ArrayList();
+ ArrayList partCols = new ArrayList();
+ StringBuilder order = new StringBuilder();
+
+ for (PartitionExpression partCol : wSpec.getQueryPartitionSpec().getExpressions()) {
+ ExprNodeDesc partExpr = semanticAnalyzer.genExprNodeDesc(partCol.getExpression(), rr);
+ if (ExprNodeDescUtils.indexOf(partExpr, partCols) < 0) {
+ keyCols.add(partExpr);
+ partCols.add(partExpr);
+ order.append('+');
+ }
+ }
+
+ if (wSpec.getQueryOrderSpec() != null) {
+ for (OrderExpression orderCol : wSpec.getQueryOrderSpec().getExpressions()) {
+ ExprNodeDesc orderExpr = semanticAnalyzer.genExprNodeDesc(orderCol.getExpression(), rr);
+ char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-';
+ int index = ExprNodeDescUtils.indexOf(orderExpr, keyCols);
+ if (index >= 0) {
+ order.setCharAt(index, orderChar);
+ continue;
+ }
+ keyCols.add(orderExpr);
+ order.append(orderChar);
+ }
+ }
+
+ SelectOperator selectOp = genReduceSinkAndBacktrackSelect(input,
+ keyCols.toArray(new ExprNodeDesc[keyCols.size()]), reduceSinkTagGenerator++, partCols,
+ order.toString(), -1, Operation.NOT_ACID, strictMode);
+
+ // 2. Finally create PTF
+ PTFTranslator translator = new PTFTranslator();
+ PTFDesc ptfDesc = translator.translate(wSpec, semanticAnalyzer, hiveConf, rr,
+ unparseTranslator);
+ RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
+
+ Operator> ptfOp = OperatorFactory.getAndMakeChild(ptfDesc,
+ new RowSchema(ptfOpRR.getColumnInfos()), selectOp);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + ptfOp + " with row schema: [" + ptfOp.getSchema() + "]");
+ }
+
+ // 3. Prepare for next iteration (if any)
+ rr = ptfOpRR;
+ input = ptfOp;
+ }
+
+ return inputOpAf.clone(input);
+ }
+
+ private ExprNodeDesc[][] extractJoinKeys(JoinPredicateInfo joinPredInfo, List inputs) {
+ ExprNodeDesc[][] joinKeys = new ExprNodeDesc[inputs.size()][];
+ for (int i = 0; i < inputs.size(); i++) {
+ joinKeys[i] = new ExprNodeDesc[joinPredInfo.getEquiJoinPredicateElements().size()];
+ for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
+ JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(j);
+ RexNode key = joinLeafPredInfo.getJoinKeyExprs(j).get(0);
+ joinKeys[i][j] = convertToExprNode(key, inputs.get(j), null);
+ }
+ }
+ return joinKeys;
+ }
+
+ private static SelectOperator genReduceSinkAndBacktrackSelect(Operator> input,
+ ExprNodeDesc[] keys, int tag, ArrayList partitionCols, String order,
+ int numReducers, Operation acidOperation, boolean strictMode) throws SemanticException {
+ // 1. Generate RS operator
+ ReduceSinkOperator rsOp = genReduceSink(input, keys, tag, partitionCols, order, numReducers,
+ acidOperation, strictMode);
+
+ // 2. Generate backtrack Select operator
+ Map descriptors = buildBacktrackFromReduceSink(rsOp,
+ input);
+ SelectDesc selectDesc = new SelectDesc(new ArrayList(descriptors.values()),
+ new ArrayList(descriptors.keySet()));
+ ArrayList cinfoLst = createColInfos(input);
+ SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc,
+ new RowSchema(cinfoLst), rsOp);
+ selectOp.setColumnExprMap(descriptors);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]");
+ }
+
+ return selectOp;
+ }
+
+ private static ReduceSinkOperator genReduceSink(Operator> input, ExprNodeDesc[] keys, int tag,
+ int numReducers, Operation acidOperation, boolean strictMode) throws SemanticException {
+ return genReduceSink(input, keys, tag, new ArrayList(), "", numReducers,
+ acidOperation, strictMode);
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ private static ReduceSinkOperator genReduceSink(Operator> input, ExprNodeDesc[] keys, int tag,
+ ArrayList partitionCols, String order, int numReducers,
+ Operation acidOperation, boolean strictMode) throws SemanticException {
+ Operator dummy = Operator.createDummy(); // dummy for backtracking
+ dummy.setParentOperators(Arrays.asList(input));
+
+ ArrayList reduceKeys = new ArrayList();
+ ArrayList reduceKeysBack = new ArrayList();
+
+ // Compute join keys and store in reduceKeys
+ for (ExprNodeDesc key : keys) {
+ reduceKeys.add(key);
+ reduceKeysBack.add(ExprNodeDescUtils.backtrack(key, dummy, input));
+ }
+
+ // Walk over the input schema and copy in the output
+ ArrayList reduceValues = new ArrayList();
+ ArrayList reduceValuesBack = new ArrayList();
+ Map colExprMap = new HashMap();
+
+ List inputColumns = input.getSchema().getSignature();
+ ArrayList outputColumns = new ArrayList();
+ List outputColumnNames = new ArrayList();
+ int[] index = new int[inputColumns.size()];
+ for (int i = 0; i < inputColumns.size(); i++) {
+ ColumnInfo colInfo = inputColumns.get(i);
+ String outputColName = colInfo.getInternalName();
+ ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo);
+
+ // backtrack can be null when input is script operator
+ ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, input);
+ int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
+ if (kindex >= 0) {
+ ColumnInfo newColInfo = new ColumnInfo(colInfo);
+ newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
+ newColInfo.setAlias(outputColName);
+ newColInfo.setTabAlias(colInfo.getTabAlias());
+ outputColumns.add(newColInfo);
+ index[i] = kindex;
+ continue;
+ }
+ int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
+ if (kindex >= 0) {
+ index[i] = -vindex - 1;
+ continue;
+ }
+ index[i] = -reduceValues.size() - 1;
+
+ reduceValues.add(expr);
+ reduceValuesBack.add(exprBack);
+
+ ColumnInfo newColInfo = new ColumnInfo(colInfo);
+ newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
+ newColInfo.setAlias(outputColName);
+ newColInfo.setTabAlias(colInfo.getTabAlias());
+
+ outputColumns.add(newColInfo);
+ outputColumnNames.add(outputColName);
+ }
+ dummy.setParentOperators(null);
+
+ // Use only 1 reducer if no reduce keys
+ if (reduceKeys.size() == 0) {
+ numReducers = 1;
+
+ // Cartesian product is not supported in strict mode
+ if (strictMode) {
+ throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg());
+ }
+ }
+
+ ReduceSinkDesc rsDesc;
+ if (order.isEmpty()) {
+ rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag,
+ reduceKeys.size(), numReducers, acidOperation);
+ } else {
+ rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag,
+ partitionCols, order, numReducers, acidOperation);
+ }
+
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc,
+ new RowSchema(outputColumns), input);
+
+ List keyColNames = rsDesc.getOutputKeyColumnNames();
+ for (int i = 0; i < keyColNames.size(); i++) {
+ colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
+ }
+ List valColNames = rsDesc.getOutputValueColumnNames();
+ for (int i = 0; i < valColNames.size(); i++) {
+ colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
+ }
+
+ rsOp.setValueIndex(index);
+ rsOp.setColumnExprMap(colExprMap);
+ rsOp.setInputAliases(input.getSchema().getColumnNames()
+ .toArray(new String[input.getSchema().getColumnNames().size()]));
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + rsOp + " with row schema: [" + rsOp.getSchema() + "]");
+ }
+
+ return rsOp;
+ }
+
+ private static JoinOperator genJoin(HiveJoin hiveJoin, JoinPredicateInfo joinPredInfo,
+ List> children, ExprNodeDesc[][] joinKeys) throws SemanticException {
+
+ // Extract join type
+ JoinType joinType = extractJoinType(hiveJoin);
+
+ // NOTE: Currently binary joins only
+ JoinCondDesc[] joinCondns = new JoinCondDesc[1];
+ joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
+
+ ArrayList outputColumns = new ArrayList();
+ ArrayList outputColumnNames = new ArrayList(hiveJoin.getRowType()
+ .getFieldNames());
+ Operator>[] childOps = new Operator[children.size()];
+
+ Map reversedExprs = new HashMap();
+ HashMap> exprMap = new HashMap>();
+ Map colExprMap = new HashMap();
+ HashMap> posToAliasMap = new HashMap>();
+
+ int outputPos = 0;
+ for (int pos = 0; pos < children.size(); pos++) {
+ ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
+ if (inputRS.getNumParent() != 1) {
+ throw new SemanticException("RS should have single parent");
+ }
+ Operator> parent = inputRS.getParentOperators().get(0);
+ ReduceSinkDesc rsDesc = inputRS.getConf();
+
+ int[] index = inputRS.getValueIndex();
+
+ Byte tag = (byte) rsDesc.getTag();
+
+ // Semijoin
+ if (joinType == JoinType.LEFTSEMI && pos != 0) {
+ exprMap.put(tag, new ArrayList());
+ childOps[pos] = inputRS;
+ continue;
+ }
+
+ List keyColNames = rsDesc.getOutputKeyColumnNames();
+ List valColNames = rsDesc.getOutputValueColumnNames();
+
+ posToAliasMap.put(pos, new HashSet(inputRS.getSchema().getTableNames()));
+
+ Map descriptors = buildBacktrackFromReduceSink(outputPos,
+ outputColumnNames, keyColNames, valColNames, index, parent);
+
+ List parentColumns = parent.getSchema().getSignature();
+ for (int i = 0; i < index.length; i++) {
+ ColumnInfo info = new ColumnInfo(parentColumns.get(i));
+ info.setInternalName(outputColumnNames.get(outputPos));
+ outputColumns.add(info);
+ reversedExprs.put(outputColumnNames.get(outputPos), tag);
+ outputPos++;
+ }
+
+ exprMap.put(tag, new ArrayList(descriptors.values()));
+ colExprMap.putAll(descriptors);
+ childOps[pos] = inputRS;
+ }
+
+ boolean noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER
+ && joinType != JoinType.RIGHTOUTER;
+ JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, joinKeys);
+ desc.setReversedExprs(reversedExprs);
+
+ JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc, new RowSchema(
+ outputColumns), childOps);
+ joinOp.setColumnExprMap(colExprMap);
+ joinOp.setPosToAliasMap(posToAliasMap);
+
+ // TODO: null safes?
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
+ }
+
+ return joinOp;
+ }
+
+ private static JoinType extractJoinType(HiveJoin join) {
+ // UNIQUE
+ if (join.isDistinct()) {
+ return JoinType.UNIQUE;
+ }
+ // SEMIJOIN
+ if (join.isLeftSemiJoin()) {
+ return JoinType.LEFTSEMI;
+ }
+ // OUTER AND INNER JOINS
+ JoinType resultJoinType;
+ switch (join.getJoinType()) {
+ case FULL:
+ resultJoinType = JoinType.FULLOUTER;
+ break;
+ case LEFT:
+ resultJoinType = JoinType.LEFTOUTER;
+ break;
+ case RIGHT:
+ resultJoinType = JoinType.RIGHTOUTER;
+ break;
+ default:
+ resultJoinType = JoinType.INNER;
+ break;
+ }
+ return resultJoinType;
+ }
+
+ private static Map buildBacktrackFromReduceSink(ReduceSinkOperator rsOp,
+ Operator> inputOp) {
+ return buildBacktrackFromReduceSink(0, inputOp.getSchema().getColumnNames(), rsOp.getConf()
+ .getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(),
+ rsOp.getValueIndex(), inputOp);
+ }
+
+ private static Map buildBacktrackFromReduceSink(int initialPos,
+ List outputColumnNames, List keyColNames, List valueColNames,
+ int[] index, Operator> inputOp) {
+ Map columnDescriptors = new LinkedHashMap();
+ for (int i = 0; i < index.length; i++) {
+ ColumnInfo info = new ColumnInfo(inputOp.getSchema().getSignature().get(i));
+ String field;
+ if (index[i] >= 0) {
+ field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
+ } else {
+ field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
+ }
+ ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(),
+ info.getIsVirtualCol());
+ columnDescriptors.put(outputColumnNames.get(initialPos + i), desc);
+ }
+ return columnDescriptors;
+ }
+
+ private static ExprNodeDesc convertToExprNode(RexNode rn, RelNode inputRel, String tabAlias) {
+ return rn.accept(new ExprNodeConverter(tabAlias, inputRel.getRowType(), false,
+ inputRel.getCluster().getTypeFactory()));
+ }
+
+ private static ArrayList createColInfos(Operator> input) {
+ ArrayList cInfoLst = new ArrayList();
+ for (ColumnInfo ci : input.getSchema().getSignature()) {
+ cInfoLst.add(new ColumnInfo(ci));
+ }
+ return cInfoLst;
+ }
+
+ private static Pair, Map> createColInfos(
+ List calciteExprs, List hiveExprs, List projNames,
+ OpAttr inpOpAf) {
+ if (hiveExprs.size() != projNames.size()) {
+ throw new RuntimeException("Column expressions list doesn't match Column Names list");
+ }
+
+ RexNode rexN;
+ ExprNodeDesc pe;
+ ArrayList colInfos = new ArrayList();
+ VirtualColumn vc;
+ Map newVColMap = new HashMap();
+ for (int i = 0; i < hiveExprs.size(); i++) {
+ pe = hiveExprs.get(i);
+ rexN = calciteExprs.get(i);
+ vc = null;
+ if (rexN instanceof RexInputRef) {
+ vc = inpOpAf.vcolMap.get(((RexInputRef) rexN).getIndex());
+ if (vc != null) {
+ newVColMap.put(i, vc);
+ }
+ }
+ colInfos
+ .add(new ColumnInfo(projNames.get(i), pe.getTypeInfo(), inpOpAf.tabAlias, vc != null));
+ }
+
+ return new Pair, Map>(colInfos, newVColMap);
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java (revision 0)
@@ -0,0 +1,1237 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * TODO:
+ * 1. Change the output col/ExprNodeColumn names to external names.
+ * 2. Verify if we need to use the "KEY."/"VALUE." in RS cols; switch to
+ * external names if possible.
+ * 3. In ExprNode & in ColumnInfo the tableAlias/VirtualColumn is specified
+ * differently for different GB/RS in pipeline. Remove the different treatments.
+ * 3. VirtualColMap needs to be maintained
+ *
+ */
+public class HiveGBOpConvUtil {
+ private static enum HIVEGBPHYSICALMODE {
+ MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB, MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB, MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT, MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT, NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW
+ };
+
+ private static class UDAFAttrs {
+ private boolean isDistinctUDAF;
+ private String udafName;
+ private GenericUDAFEvaluator udafEvaluator;
+ private ArrayList udafParams = new ArrayList();
+ private List udafParamsIndxInGBInfoDistExprs = new ArrayList();
+ };
+
+ private static class GBInfo {
+ private List outputColNames = new ArrayList();
+
+ private List gbKeyColNamesInInput = new ArrayList();
+ private List gbKeyTypes = new ArrayList();
+ private List gbKeys = new ArrayList();
+
+ private List grpSets = new ArrayList();
+ private boolean grpSetRqrAdditionalMRJob;
+ private boolean grpIdFunctionNeeded;
+
+ private List distExprNames = new ArrayList();
+ private List distExprTypes = new ArrayList();
+ private List distExprNodes = new ArrayList();
+ private List> distColIndices = new ArrayList>();
+
+ private List deDupedNonDistIrefs = new ArrayList();
+
+ private List udafAttrs = new ArrayList();
+ private boolean containsDistinctAggr = false;
+
+ float groupByMemoryUsage;
+ float memoryThreshold;
+
+ private HIVEGBPHYSICALMODE gbPhysicalPipelineMode;
+ };
+
+ private static HIVEGBPHYSICALMODE getAggOPMode(HiveConf hc, GBInfo gbInfo) {
+ HIVEGBPHYSICALMODE gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB;
+
+ if (hc.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
+ if (!hc.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+ if (!gbInfo.grpSetRqrAdditionalMRJob) {
+ gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB;
+ } else {
+ gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB;
+ }
+ } else {
+ if (gbInfo.containsDistinctAggr || !gbInfo.gbKeys.isEmpty()) {
+ gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT;
+ } else {
+ gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT;
+ }
+ }
+ } else {
+ if (!hc.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+ gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW;
+ } else {
+ gbPhysicalPipelineMode = HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_SKEW;
+ }
+ }
+
+ return gbPhysicalPipelineMode;
+ }
+
+ // For each of the GB op in the logical GB this should be called seperately;
+ // otherwise GBevaluator and expr nodes may get shared among multiple GB ops
+ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf hc) {
+ GBInfo gbInfo = new GBInfo();
+
+ // 0. Collect AggRel output col Names
+ gbInfo.outputColNames.addAll(aggRel.getRowType().getFieldNames());
+
+ // 1. Collect GB Keys
+ RelNode aggInputRel = aggRel.getInput();
+ ExprNodeConverter exprConv = new ExprNodeConverter(inputOpAf.tabAlias,
+ aggInputRel.getRowType(), false, aggRel.getCluster().getTypeFactory());
+
+ ExprNodeDesc tmpExprNodeDesc;
+ for (int i : aggRel.getGroupSet()) {
+ RexInputRef iRef = new RexInputRef(i, (RelDataType) aggInputRel.getRowType().getFieldList()
+ .get(i).getType());
+ tmpExprNodeDesc = iRef.accept(exprConv);
+ gbInfo.gbKeys.add(tmpExprNodeDesc);
+ gbInfo.gbKeyColNamesInInput.add(aggInputRel.getRowType().getFieldNames().get(i));
+ gbInfo.gbKeyTypes.add(tmpExprNodeDesc.getTypeInfo());
+ }
+
+ // 2. Collect Grouping Set info
+ if (aggRel.indicator) {
+ // 2.1 Translate Grouping set col bitset
+ ImmutableList lstGrpSet = aggRel.getGroupSets();
+ int bitmap = 0;
+ for (ImmutableBitSet grpSet : lstGrpSet) {
+ bitmap = 0;
+ for (Integer bitIdx : grpSet.asList()) {
+ bitmap = SemanticAnalyzer.setBit(bitmap, bitIdx);
+ }
+ gbInfo.grpSets.add(bitmap);
+ }
+ Collections.sort(gbInfo.grpSets);
+
+ // 2.2 Check if GRpSet require additional MR Job
+ gbInfo.grpSetRqrAdditionalMRJob = gbInfo.grpSets.size() > hc
+ .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY);
+
+ // 2.3 Check if GROUPING_ID needs to be projected out
+ if (!aggRel.getAggCallList().isEmpty()
+ && (aggRel.getAggCallList().get(aggRel.getAggCallList().size() - 1).getAggregation() == HiveGroupingID.INSTANCE)) {
+ gbInfo.grpIdFunctionNeeded = true;
+ }
+ }
+
+ // 3. Walk through UDAF & Collect Distinct Info
+ Set distinctRefs = new HashSet();
+ Map distParamInRefsToOutputPos = new HashMap();
+ for (AggregateCall aggCall : aggRel.getAggCallList()) {
+ if ((aggCall.getAggregation() == HiveGroupingID.INSTANCE) || !aggCall.isDistinct()) {
+ continue;
+ }
+
+ List argLst = new ArrayList(aggCall.getArgList());
+ List argNames = HiveCalciteUtil.getFieldNames(argLst, aggInputRel);
+ ExprNodeDesc distinctExpr;
+ for (int i = 0; i < argLst.size(); i++) {
+ if (!distinctRefs.contains(argLst.get(i))) {
+ distinctRefs.add(argLst.get(i));
+ distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
+ distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv);
+ gbInfo.distExprNodes.add(distinctExpr);
+ gbInfo.distExprNames.add(argNames.get(i));
+ gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
+ }
+ }
+ }
+
+ // 4. Walk through UDAF & Collect UDAF Info
+ Set deDupedNonDistIrefsSet = new HashSet();
+ for (AggregateCall aggCall : aggRel.getAggCallList()) {
+ if (aggCall.getAggregation() == HiveGroupingID.INSTANCE) {
+ continue;
+ }
+
+ UDAFAttrs udafAttrs = new UDAFAttrs();
+ udafAttrs.udafParams.addAll(HiveCalciteUtil.getExprNodes(aggCall.getArgList(), aggInputRel,
+ inputOpAf.tabAlias));
+ udafAttrs.udafName = aggCall.getAggregation().getName();
+ udafAttrs.isDistinctUDAF = aggCall.isDistinct();
+ List argLst = new ArrayList(aggCall.getArgList());
+ List distColIndicesOfUDAF = new ArrayList();
+ List distUDAFParamsIndxInDistExprs = new ArrayList();
+ for (int i = 0; i < argLst.size(); i++) {
+ // NOTE: distinct expr can not be part of of GB key (we assume plan
+ // gen would have prevented it)
+ if (udafAttrs.isDistinctUDAF) {
+ distColIndicesOfUDAF.add(distParamInRefsToOutputPos.get(argLst.get(i)));
+ distUDAFParamsIndxInDistExprs.add(distParamInRefsToOutputPos.get(argLst.get(i)));
+ } else {
+ // TODO: this seems wrong (following what Hive Regular does)
+ if (!distParamInRefsToOutputPos.containsKey(argLst.get(i))
+ && !deDupedNonDistIrefsSet.contains(argLst.get(i))) {
+ deDupedNonDistIrefsSet.add(i);
+ gbInfo.deDupedNonDistIrefs.add(udafAttrs.udafParams.get(i));
+ }
+ }
+ }
+
+ if (udafAttrs.isDistinctUDAF) {
+ gbInfo.containsDistinctAggr = true;
+
+ udafAttrs.udafParamsIndxInGBInfoDistExprs = distUDAFParamsIndxInDistExprs;
+ gbInfo.distColIndices.add(distColIndicesOfUDAF);
+ }
+ try {
+ udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName,
+ new ArrayList(udafAttrs.udafParams), new ASTNode(),
+ udafAttrs.isDistinctUDAF, false);
+ } catch (SemanticException e) {
+ throw new RuntimeException(e);
+ }
+ gbInfo.udafAttrs.add(udafAttrs);
+ }
+
+ // 4. Gather GB Memory threshold
+ gbInfo.groupByMemoryUsage = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
+ gbInfo.memoryThreshold = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
+
+ // 5. Gather GB Physical pipeline (based on user config & Grping Sets size)
+ gbInfo.gbPhysicalPipelineMode = getAggOPMode(hc, gbInfo);
+
+ return gbInfo;
+ }
+
+ static OpAttr translateGB(OpAttr inputOpAf, HiveAggregate aggRel, HiveConf hc)
+ throws SemanticException {
+ OpAttr translatedGBOpAttr = null;
+ GBInfo gbInfo = getGBInfo(aggRel, inputOpAf, hc);
+
+ switch (gbInfo.gbPhysicalPipelineMode) {
+ case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB:
+ translatedGBOpAttr = genMapSideGBNoSkewNoAddMRJob(inputOpAf, aggRel, gbInfo);
+ break;
+ case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
+ translatedGBOpAttr = genMapSideGBNoSkewAddMRJob(inputOpAf, aggRel, gbInfo);
+ break;
+ case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
+ translatedGBOpAttr = genMapSideGBSkewGBKeysOrDistUDAFPresent(inputOpAf, aggRel, gbInfo);
+ break;
+ case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT:
+ translatedGBOpAttr = genMapSideGBSkewGBKeysAndDistUDAFNotPresent(inputOpAf, aggRel, gbInfo);
+ break;
+ case NO_MAP_SIDE_GB_NO_SKEW:
+ translatedGBOpAttr = genNoMapSideGBNoSkew(inputOpAf, aggRel, gbInfo);
+ break;
+ case NO_MAP_SIDE_GB_SKEW:
+ translatedGBOpAttr = genNoMapSideGBSkew(inputOpAf, aggRel, gbInfo);
+ break;
+ }
+
+ return translatedGBOpAttr;
+ }
+
+ /**
+ * GB-RS-GB1
+ *
+ * Construct GB-RS-GB Pipe line. User has enabled Map Side GB, specified no
+ * skew and Grp Set is below the threshold.
+ *
+ * @param inputOpAf
+ * @param aggRel
+ * @param gbInfo
+ * @return
+ * @throws SemanticException
+ */
+ private static OpAttr genMapSideGBNoSkewNoAddMRJob(OpAttr inputOpAf, HiveAggregate aggRel,
+ GBInfo gbInfo) throws SemanticException {
+ OpAttr mapSideGB = null;
+ OpAttr mapSideRS = null;
+ OpAttr reduceSideGB = null;
+
+ // 1. Insert MapSide GB
+ mapSideGB = genMapSideGB(inputOpAf, gbInfo);
+
+ // 2. Insert MapSide RS
+ mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
+
+ // 3. Insert ReduceSide GB
+ reduceSideGB = genReduceSideGB1(mapSideRS, gbInfo, false, false, GroupByDesc.Mode.MERGEPARTIAL);
+
+ return reduceSideGB;
+ }
+
+ /**
+ * GB-RS-GB1-RS-GB2
+ */
+ private static OpAttr genGBRSGBRSGBOpPipeLine(OpAttr inputOpAf, HiveAggregate aggRel,
+ GBInfo gbInfo) throws SemanticException {
+ OpAttr mapSideGB = null;
+ OpAttr mapSideRS = null;
+ OpAttr reduceSideGB1 = null;
+ OpAttr reduceSideRS = null;
+ OpAttr reduceSideGB2 = null;
+
+ // 1. Insert MapSide GB
+ mapSideGB = genMapSideGB(inputOpAf, gbInfo);
+
+ // 2. Insert MapSide RS
+ mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
+
+ // 3. Insert ReduceSide GB1
+ boolean computeGrpSet = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT) ? false : true;
+ reduceSideGB1 = genReduceSideGB1(mapSideRS, gbInfo, computeGrpSet, false, GroupByDesc.Mode.PARTIALS);
+
+ // 4. Insert RS on reduce side with Reduce side GB as input
+ reduceSideRS = genReduceGBRS(reduceSideGB1, gbInfo);
+
+ // 5. Insert ReduceSide GB2
+ reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
+
+ return reduceSideGB2;
+ }
+
+ /**
+ * GB-RS-GB1-RS-GB2
+ *
+ * @param inputOpAf
+ * @param aggRel
+ * @param gbInfo
+ * @return
+ * @throws SemanticException
+ */
+ private static OpAttr genMapSideGBNoSkewAddMRJob(OpAttr inputOpAf, HiveAggregate aggRel,
+ GBInfo gbInfo) throws SemanticException {
+ // 1. Sanity check
+ if (gbInfo.containsDistinctAggr) {
+ String errorMsg = "The number of rows per input row due to grouping sets is "
+ + gbInfo.grpSets.size();
+ throw new SemanticException(
+ ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_DISTINCTS.getMsg(errorMsg));
+ }
+
+ // 2. Gen GB-RS-GB-RS-GB pipeline
+ return genGBRSGBRSGBOpPipeLine(inputOpAf, aggRel, gbInfo);
+ }
+
+ /**
+ * GB-RS-GB1-RS-GB2
+ *
+ * @param inputOpAf
+ * @param aggRel
+ * @param gbInfo
+ * @return
+ * @throws SemanticException
+ */
+ private static OpAttr genMapSideGBSkewGBKeysOrDistUDAFPresent(OpAttr inputOpAf,
+ HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
+ // 1. Sanity check
+ if (gbInfo.grpSetRqrAdditionalMRJob) {
+ String errorMsg = "The number of rows per input row due to grouping sets is "
+ + gbInfo.grpSets.size();
+ throw new SemanticException(
+ ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
+ }
+
+ // 2. Gen GB-RS-GB-RS-GB pipeline
+ return genGBRSGBRSGBOpPipeLine(inputOpAf, aggRel, gbInfo);
+ }
+
+ /**
+ * GB-RS-GB2
+ *
+ * @param inputOpAf
+ * @param aggRel
+ * @param gbInfo
+ * @return
+ * @throws SemanticException
+ */
+ private static OpAttr genMapSideGBSkewGBKeysAndDistUDAFNotPresent(OpAttr inputOpAf,
+ HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
+ OpAttr mapSideGB = null;
+ OpAttr mapSideRS = null;
+ OpAttr reduceSideGB2 = null;
+
+ // 1. Sanity check
+ if (gbInfo.grpSetRqrAdditionalMRJob) {
+ String errorMsg = "The number of rows per input row due to grouping sets is "
+ + gbInfo.grpSets.size();
+ throw new SemanticException(
+ ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
+ }
+
+ // 1. Insert MapSide GB
+ mapSideGB = genMapSideGB(inputOpAf, gbInfo);
+
+ // 2. Insert MapSide RS
+ mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
+
+ // 3. Insert ReduceSide GB2
+ reduceSideGB2 = genReduceSideGB2(mapSideRS, gbInfo);
+
+ return reduceSideGB2;
+ }
+
+ /**
+ * RS-Gb1
+ *
+ * @param inputOpAf
+ * @param aggRel
+ * @param gbInfo
+ * @return
+ * @throws SemanticException
+ */
+ private static OpAttr genNoMapSideGBNoSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo)
+ throws SemanticException {
+ OpAttr mapSideRS = null;
+ OpAttr reduceSideGB1NoMapGB = null;
+
+ // 1. Insert MapSide RS
+ mapSideRS = genMapSideRS(inputOpAf, gbInfo);
+
+ // 2. Insert ReduceSide GB
+ reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.COMPLETE);
+
+ return reduceSideGB1NoMapGB;
+ }
+
+ /**
+ * RS-GB1-RS-GB2
+ *
+ * @param inputOpAf
+ * @param aggRel
+ * @param gbInfo
+ * @return
+ * @throws SemanticException
+ */
+ private static OpAttr genNoMapSideGBSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo)
+ throws SemanticException {
+ OpAttr mapSideRS = null;
+ OpAttr reduceSideGB1NoMapGB = null;
+ OpAttr reduceSideRS = null;
+ OpAttr reduceSideGB2 = null;
+
+ // 1. Insert MapSide RS
+ mapSideRS = genMapSideRS(inputOpAf, gbInfo);
+
+ // 2. Insert ReduceSide GB
+ reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.PARTIAL1);
+
+ // 3. Insert RS on reduce side with Reduce side GB as input
+ reduceSideRS = genReduceGBRS(reduceSideGB1NoMapGB, gbInfo);
+
+ // 4. Insert ReduceSide GB2
+ reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
+
+ return reduceSideGB2;
+ }
+
+ private static int getParallelismForReduceSideRS(GBInfo gbInfo) {
+ int degreeOfParallelism = 0;
+
+ switch (gbInfo.gbPhysicalPipelineMode) {
+ case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
+ case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
+ case NO_MAP_SIDE_GB_SKEW:
+ if (gbInfo.gbKeys.isEmpty()) {
+ degreeOfParallelism = 1;
+ } else {
+ degreeOfParallelism = -1;
+ }
+ break;
+ default:
+ throw new RuntimeException(
+ "Unable to determine Reducer Parallelism - Invalid Physical Mode: "
+ + gbInfo.gbPhysicalPipelineMode);
+ }
+
+ return degreeOfParallelism;
+ }
+
+ private static int getParallelismForMapSideRS(GBInfo gbInfo) {
+ int degreeOfParallelism = 0;
+
+ switch (gbInfo.gbPhysicalPipelineMode) {
+ case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB:
+ case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
+ case NO_MAP_SIDE_GB_NO_SKEW:
+ if (gbInfo.gbKeys.isEmpty()) {
+ degreeOfParallelism = 1;
+ } else {
+ degreeOfParallelism = -1;
+ }
+ break;
+ case NO_MAP_SIDE_GB_SKEW:
+ case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
+ degreeOfParallelism = -1;
+ break;
+ case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT:
+ degreeOfParallelism = 1;
+ break;
+ default:
+ throw new RuntimeException(
+ "Unable to determine Reducer Parallelism - Invalid Physical Mode: "
+ + gbInfo.gbPhysicalPipelineMode);
+ }
+
+ return degreeOfParallelism;
+ }
+
+ private static int getNumPartFieldsForReduceSideRS(GBInfo gbInfo) {
+ int numPartFields = 0;
+
+ switch (gbInfo.gbPhysicalPipelineMode) {
+ case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
+ numPartFields = gbInfo.gbKeys.size() + 1;
+ break;
+ case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
+ case NO_MAP_SIDE_GB_SKEW:
+ numPartFields = gbInfo.gbKeys.size();
+ break;
+ default:
+ throw new RuntimeException(
+ "Unable to determine Number of Partition Fields - Invalid Physical Mode: "
+ + gbInfo.gbPhysicalPipelineMode);
+ }
+
+ return numPartFields;
+ }
+
+ private static int getNumPartFieldsForMapSideRS(GBInfo gbInfo) {
+ int numPartFields = 0;
+
+ switch (gbInfo.gbPhysicalPipelineMode) {
+ case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB:
+ case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
+ case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT:
+ case NO_MAP_SIDE_GB_NO_SKEW:
+ numPartFields += gbInfo.gbKeys.size();
+ break;
+ case NO_MAP_SIDE_GB_SKEW:
+ case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
+ if (gbInfo.containsDistinctAggr) {
+ numPartFields = Integer.MAX_VALUE;
+ } else {
+ numPartFields = -1;
+ }
+ break;
+ default:
+ throw new RuntimeException(
+ "Unable to determine Number of Partition Fields - Invalid Physical Mode: "
+ + gbInfo.gbPhysicalPipelineMode);
+ }
+
+ return numPartFields;
+ }
+
+ private static boolean inclGrpSetInReduceSide(GBInfo gbInfo) {
+ boolean inclGrpSet = false;
+
+ if (gbInfo.grpSets.size() > 0
+ && (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB || gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT)) {
+ inclGrpSet = true;
+ }
+
+ return inclGrpSet;
+ }
+
+ private static boolean inclGrpSetInMapSide(GBInfo gbInfo) {
+ boolean inclGrpSet = false;
+
+ if (gbInfo.grpSets.size() > 0
+ && ((gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB) ||
+ gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT)) {
+ inclGrpSet = true;
+ }
+
+ return inclGrpSet;
+ }
+
+ private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
+ Map colExprMap = new HashMap();
+ ArrayList outputColumnNames = new ArrayList();
+ ArrayList colInfoLst = new ArrayList();
+ GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0);
+ List gb1ColInfoLst = reduceSideGB1.getSchema().getSignature();
+
+ ArrayList reduceKeys = getReduceKeysForRS(reduceSideGB1, 0,
+ gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true);
+ if (inclGrpSetInReduceSide(gbInfo)) {
+ addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys,
+ outputColumnNames, colInfoLst, colExprMap);
+ }
+
+ ArrayList reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf()
+ .getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true);
+
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils
+ .getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1,
+ getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo),
+ AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1);
+
+ rsOp.setColumnExprMap(colExprMap);
+
+ return new OpAttr("", new HashMap(), rsOp);
+ }
+
+ private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
+ Map colExprMap = new HashMap();
+ List outputKeyColumnNames = new ArrayList();
+ List outputValueColumnNames = new ArrayList();
+ ArrayList colInfoLst = new ArrayList();
+ GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
+ int distColStartIndx = gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0);
+
+ ArrayList reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1,
+ outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
+ int keyLength = reduceKeys.size();
+
+ if (inclGrpSetInMapSide(gbInfo)) {
+ addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true,
+ reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap);
+ keyLength++;
+ }
+ if (mapGB.getConf().getKeys().size() > reduceKeys.size()) {
+ // NOTE: All dist cols have single output col name;
+ reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys()
+ .size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
+ }
+
+ ArrayList reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys()
+ .size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
+ List> distinctColIndices = getDistColIndices(gbInfo, distColStartIndx);
+
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils
+ .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, distinctColIndices,
+ outputKeyColumnNames, outputValueColumnNames, true, -1,
+ getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo),
+ AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
+
+ rsOp.setColumnExprMap(colExprMap);
+
+ return new OpAttr("", new HashMap(), rsOp);
+ }
+
+ private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
+ Map colExprMap = new HashMap();
+ List outputKeyColumnNames = new ArrayList();
+ List outputValueColumnNames = new ArrayList();
+ ArrayList