diff --git a/data/conf/spark/standalone/hive-site.xml b/data/conf/spark/standalone/hive-site.xml
index 016f568..abc73f8 100644
--- a/data/conf/spark/standalone/hive-site.xml
+++ b/data/conf/spark/standalone/hive-site.xml
@@ -230,4 +230,10 @@
hive_admin_user
+
+ hive.in.test
+ true
+ Internal marker for test. Used for masking env-dependent values
+
+
diff --git a/data/conf/spark/yarn-client/hive-site.xml b/data/conf/spark/yarn-client/hive-site.xml
index 39ba20e..b7211ee 100644
--- a/data/conf/spark/yarn-client/hive-site.xml
+++ b/data/conf/spark/yarn-client/hive-site.xml
@@ -250,4 +250,10 @@
hive_admin_user
+
+ hive.in.test
+ true
+ Internal marker for test. Used for masking env-dependent values
+
+
diff --git a/ql/src/test/queries/clientpositive/cbo_subq_in.q b/ql/src/test/queries/clientpositive/cbo_subq_in.q
index 7a0bfba..0a25b9c 100644
--- a/ql/src/test/queries/clientpositive/cbo_subq_in.q
+++ b/ql/src/test/queries/clientpositive/cbo_subq_in.q
@@ -29,6 +29,7 @@ select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+ order by p.p_partkey
;
-- where and having
diff --git a/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q b/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q
index b7e1bf1..0a1f137 100644
--- a/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q
+++ b/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q
@@ -7,12 +7,12 @@ CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE;
EXPLAIN
FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10;
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10;
FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10;
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10;
SELECT DEST1.* FROM DEST1;
SELECT DEST2.* FROM DEST2;
diff --git a/ql/src/test/results/clientpositive/cbo_subq_in.q.out b/ql/src/test/results/clientpositive/cbo_subq_in.q.out
index c1f3de7..f6bfad2 100644
--- a/ql/src/test/results/clientpositive/cbo_subq_in.q.out
+++ b/ql/src/test/results/clientpositive/cbo_subq_in.q.out
@@ -69,6 +69,7 @@ select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+ order by p.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@lineitem
#### A masked pattern was here ####
@@ -77,6 +78,7 @@ select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+ order by p.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@lineitem
#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out
index 4611f60..0564056 100644
--- a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out
+++ b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out
@@ -20,13 +20,13 @@ POSTHOOK: Output: database:default
POSTHOOK: Output: default@DEST2
PREHOOK: query: EXPLAIN
FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -84,29 +84,27 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
Reduce Output Operator
- sort order:
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: array), _col1 (type: bigint)
+ key expressions: _col0 (type: array)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: array), VALUE._col1 (type: bigint)
+ expressions: KEY.reducesinkkey0 (type: array), VALUE._col0 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
@@ -149,29 +147,27 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-6
Map Reduce
Map Operator Tree:
TableScan
Reduce Output Operator
- sort order:
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: map), _col1 (type: bigint)
+ key expressions: _col0 (type: map)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: map), VALUE._col1 (type: bigint)
+ expressions: KEY.reducesinkkey0 (type: map), VALUE._col0 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
@@ -198,15 +194,15 @@ STAGE PLANS:
Stats-Aggr Operator
PREHOOK: query: FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@dest1
PREHOOK: Output: default@dest2
POSTHOOK: query: FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
index 032926d..8955a61 100644
--- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
@@ -173,12 +173,16 @@ STAGE PLANS:
Filter Operator
predicate: deptid is not null (type: boolean)
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int)
- sort order: +
- Map-reduce partition columns: deptid (type: int)
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: lastname (type: string), locid (type: int)
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col2 (type: int)
Map 3
Map Operator Tree:
TableScan
@@ -187,33 +191,33 @@ STAGE PLANS:
Filter Operator
predicate: deptid is not null (type: boolean)
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int)
- sort order: +
- Map-reduce partition columns: deptid (type: int)
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: deptname (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 deptid (type: int)
- 1 deptid (type: int)
- outputColumnNames: _col0, _col1, _col2, _col6, _col7
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -248,12 +252,16 @@ STAGE PLANS:
Filter Operator
predicate: (deptid is not null and lastname is not null) (type: boolean)
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int), lastname (type: string)
- sort order: ++
- Map-reduce partition columns: deptid (type: int), lastname (type: string)
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: locid (type: int)
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col0 (type: string)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
Map 3
Map Operator Tree:
TableScan
@@ -262,35 +270,32 @@ STAGE PLANS:
Filter Operator
predicate: (deptid is not null and deptname is not null) (type: boolean)
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int), deptname (type: string)
- sort order: ++
- Map-reduce partition columns: deptid (type: int), deptname (type: string)
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 deptid (type: int), lastname (type: string)
- 1 deptid (type: int), deptname (type: string)
- outputColumnNames: _col0, _col1, _col2, _col6, _col7
+ 0 _col1 (type: int), _col0 (type: string)
+ 1 _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean)
- Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -321,12 +326,16 @@ STAGE PLANS:
Filter Operator
predicate: (deptid is not null and lastname is not null) (type: boolean)
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int), lastname (type: string)
- sort order: ++
- Map-reduce partition columns: deptid (type: int), lastname (type: string)
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: locid (type: int)
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: int), _col0 (type: string)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
Map 3
Map Operator Tree:
TableScan
@@ -335,32 +344,32 @@ STAGE PLANS:
Filter Operator
predicate: (deptid is not null and deptname is not null) (type: boolean)
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int), deptname (type: string)
- sort order: ++
- Map-reduce partition columns: deptid (type: int), deptname (type: string)
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 deptid (type: int), lastname (type: string)
- 1 deptid (type: int), deptname (type: string)
- outputColumnNames: _col0, _col1, _col2, _col6, _col7
+ 0 _col1 (type: int), _col0 (type: string)
+ 1 _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -395,12 +404,16 @@ STAGE PLANS:
Filter Operator
predicate: (deptid is not null and lastname is not null) (type: boolean)
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int), lastname (type: string), lastname (type: string)
- sort order: +++
- Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string)
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: locid (type: int)
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col0 (type: string), _col0 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col0 (type: string)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
Map 3
Map Operator Tree:
TableScan
@@ -409,35 +422,32 @@ STAGE PLANS:
Filter Operator
predicate: (deptid is not null and deptname is not null) (type: boolean)
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: deptid (type: int), deptname (type: string), deptname (type: string)
- sort order: +++
- Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string)
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string), _col1 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col1 (type: string)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 deptid (type: int), lastname (type: string), lastname (type: string)
- 1 deptid (type: int), deptname (type: string), deptname (type: string)
- outputColumnNames: _col0, _col1, _col2, _col6, _col7
+ 0 _col1 (type: int), _col0 (type: string), _col0 (type: string)
+ 1 _col0 (type: int), _col1 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean)
- Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/auto_join1.q.out b/ql/src/test/results/clientpositive/spark/auto_join1.q.out
index d9215f8..d26a33e 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join1.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join1.q.out
@@ -32,15 +32,19 @@ STAGE PLANS:
Map 2
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Local Work:
Map Reduce Local Work
@@ -56,28 +60,32 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: UDFToInteger(_col2) (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
Local Work:
Map Reduce Local Work
@@ -105,7 +113,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join10.q.out b/ql/src/test/results/clientpositive/spark/auto_join10.q.out
index cd50576..b43e55c 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join10.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join10.q.out
@@ -33,8 +33,8 @@ STAGE PLANS:
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
@@ -58,8 +58,8 @@ STAGE PLANS:
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -67,19 +67,23 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col2, _col3
+ outputColumnNames: _col0, _col1
input vertices:
1 Map 3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(hash(_col2,_col3))
- mode: hash
+ Select Operator
+ expressions: hash(_col0,_col1) (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Local Work:
Map Reduce Local Work
Reducer 2
diff --git a/ql/src/test/results/clientpositive/spark/auto_join11.q.out b/ql/src/test/results/clientpositive/spark/auto_join11.q.out
index ed32dec..f8fc309 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join11.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join11.q.out
@@ -30,11 +30,11 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 100) (type: boolean)
+ predicate: (UDFToDouble(key) < 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
@@ -55,11 +55,11 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 100) (type: boolean)
+ predicate: (UDFToDouble(key) < 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -67,19 +67,23 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col3
+ outputColumnNames: _col1, _col2
input vertices:
1 Map 3
Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(hash(_col0,_col3))
- mode: hash
+ Select Operator
+ expressions: hash(_col2,_col1) (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Local Work:
Map Reduce Local Work
Reducer 2
diff --git a/ql/src/test/results/clientpositive/spark/auto_join14.q.out b/ql/src/test/results/clientpositive/spark/auto_join14.q.out
index 830314e..30be2e8 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join14.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join14.q.out
@@ -29,18 +29,22 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key > 100) (type: boolean)
+ predicate: (UDFToDouble(key) > 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Local Work:
Map Reduce Local Work
@@ -48,36 +52,40 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
alias: srcpart
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 100) and key is not null) (type: boolean)
+ predicate: ((UDFToDouble(key) > 100.0) and key is not null) (type: boolean)
Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
- input vertices:
- 0 Map 1
- Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col3
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: UDFToInteger(_col3) (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
Local Work:
Map Reduce Local Work
diff --git a/ql/src/test/results/clientpositive/spark/auto_join16.q.out b/ql/src/test/results/clientpositive/spark/auto_join16.q.out
index 8c166b0..5c4bbb3 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join16.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join16.q.out
@@ -24,61 +24,69 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > 10) and value is not null) and (key > 20)) and (value < 200)) (type: boolean)
- Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) > 20.0)) and key is not null) and value is not null) (type: boolean)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: string), _col1 (type: string)
- 1 key (type: string), value (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
Local Work:
Map Reduce Local Work
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 2 (GROUP, 1)
+ Reducer 2 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
- alias: tab
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean)
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string), _col1 (type: string)
- 1 key (type: string), value (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 1
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(hash(_col0,_col3))
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(value) < 200.0)) and key is not null) and value is not null) (type: boolean)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col0,_col3) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Local Work:
Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
diff --git a/ql/src/test/results/clientpositive/spark/auto_join17.q.out b/ql/src/test/results/clientpositive/spark/auto_join17.q.out
index 3144db6..bc492c9 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join17.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join17.q.out
@@ -28,15 +28,19 @@ STAGE PLANS:
Map 2
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Local Work:
Map Reduce Local Work
@@ -52,28 +56,32 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
Local Work:
Map Reduce Local Work
@@ -101,9 +109,9 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT sum(hash(dest1.key1,dest1.value1,dest1.key2,dest1.value2)) FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join18.q.out b/ql/src/test/results/clientpositive/spark/auto_join18.q.out
index f5a2227..eaef06c 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join18.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join18.q.out
@@ -45,11 +45,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(value)
- keys: key (type: string)
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -66,11 +66,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT value)
- keys: key (type: string), value (type: string)
+ aggregations: count(DISTINCT _col1)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -103,15 +103,19 @@ STAGE PLANS:
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(hash(_col0,_col1,_col2,_col3))
- mode: hash
+ Select Operator
+ expressions: hash(_col0,_col1,_col2,_col3) (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Reducer 4
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out
index 07c7aa5..df737c2 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out
@@ -47,11 +47,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(value)
- keys: key (type: string)
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -68,11 +68,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT value), count(DISTINCT key)
- keys: key (type: string), value (type: string)
+ aggregations: count(DISTINCT _col1), count(DISTINCT _col0)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -105,15 +105,19 @@ STAGE PLANS:
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4))
- mode: hash
+ Select Operator
+ expressions: hash(_col0,_col1,_col2,_col3,_col4) (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Reducer 4
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/auto_join19.q.out b/ql/src/test/results/clientpositive/spark/auto_join19.q.out
index f2b0140..9e4fb8f 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join19.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join19.q.out
@@ -35,10 +35,14 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Local Work:
Map Reduce Local Work
@@ -54,28 +58,32 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col8
- input vertices:
- 1 Map 2
- Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col8 (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col4
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col4 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
Local Work:
Map Reduce Local Work
diff --git a/ql/src/test/results/clientpositive/spark/auto_join26.q.out b/ql/src/test/results/clientpositive/spark/auto_join26.q.out
index 58821e9..3c437a1 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join26.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join26.q.out
@@ -29,7 +29,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
alias: x
@@ -37,20 +37,24 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Local Work:
Map Reduce Local Work
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 2 (GROUP, 2)
+ Reducer 2 <- Map 1 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
alias: y
@@ -58,31 +62,39 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
+ Select Operator
+ expressions: key (type: string)
outputColumnNames: _col0
- input vertices:
- 0 Map 1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Local Work:
Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -128,7 +140,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Output: default@dest_j1
-POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ]
+POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ]
POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/spark/auto_join27.q.out b/ql/src/test/results/clientpositive/spark/auto_join27.q.out
index 1722837..43313e0 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join27.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join27.q.out
@@ -41,7 +41,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 200) (type: boolean)
+ predicate: (UDFToDouble(key) < 200.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
@@ -58,25 +58,29 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 200) (type: boolean)
+ predicate: (UDFToDouble(key) < 200.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: key (type: string), value (type: string)
- mode: hash
+ Select Operator
+ expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Map 6
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 200) (type: boolean)
+ predicate: (UDFToDouble(key) < 200.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/auto_join4.q.out b/ql/src/test/results/clientpositive/spark/auto_join4.q.out
index 3366f75..7cf582c 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join4.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join4.q.out
@@ -50,15 +50,15 @@ STAGE PLANS:
Map 2
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
@@ -76,7 +76,7 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -154,8 +154,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join5.q.out b/ql/src/test/results/clientpositive/spark/auto_join5.q.out
index b6d8798..08e9ae6 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join5.q.out
@@ -47,18 +47,18 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
@@ -70,13 +70,13 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 15) and (key < 25)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -84,16 +84,16 @@ STAGE PLANS:
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
- Right Outer Join0 to 1
+ Left Outer Join0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 0 Map 1
+ 1 Map 2
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string)
+ expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -154,8 +154,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join6.q.out b/ql/src/test/results/clientpositive/spark/auto_join6.q.out
index 2ca2674..5b7e904 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join6.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join6.q.out
@@ -54,7 +54,7 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -69,10 +69,10 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 15) and (key < 25)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -154,8 +154,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join8.q.out b/ql/src/test/results/clientpositive/spark/auto_join8.q.out
index e0df9a2..e77817a 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join8.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join8.q.out
@@ -50,15 +50,15 @@ STAGE PLANS:
Map 2
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean)
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
@@ -76,7 +76,7 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean)
+ predicate: (((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean)
Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -96,7 +96,7 @@ STAGE PLANS:
predicate: _col2 is null (type: boolean)
Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), null (type: int), _col3 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -157,8 +157,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join9.q.out b/ql/src/test/results/clientpositive/spark/auto_join9.q.out
index 6daf348..568891b 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join9.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join9.q.out
@@ -33,10 +33,14 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Local Work:
Map Reduce Local Work
@@ -52,28 +56,32 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col8
- input vertices:
- 1 Map 2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col8 (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col4
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col4 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
Local Work:
Map Reduce Local Work
diff --git a/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out b/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out
index 8934433..b6b89c1 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out
@@ -44,7 +44,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
3080335
-Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -330,7 +330,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
3078400
-Warning: Shuffle Join JOIN[10][tables = [a, b]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out
index 1f37c75..4b809ce 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out
@@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@myinput1
-Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -24,7 +24,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
13630578
-Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -34,7 +34,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
13630578
-Warning: Map Join MAPJOIN[11][bigTable=b] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out
index c3d5225..72f60d0 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out
@@ -24,33 +24,41 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 4
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out b/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out
index c1f3de7..f6bfad2 100644
--- a/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out
@@ -69,6 +69,7 @@ select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+ order by p.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@lineitem
#### A masked pattern was here ####
@@ -77,6 +78,7 @@ select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+ order by p.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@lineitem
#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
index 602de6e..665bfce 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
@@ -23,39 +23,45 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 100) (type: boolean)
+ predicate: (UDFToDouble(key) = 100.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: '100' (type: string)
- sort order: +
- Map-reduce partition columns: '100' (type: string)
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '100' (type: string)
+ sort order: +
+ Map-reduce partition columns: '100' (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 100) (type: boolean)
+ predicate: (UDFToDouble(key) = 100.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: '100' (type: string)
- sort order: +
- Map-reduce partition columns: '100' (type: string)
+ Select Operator
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: '100' (type: string)
+ sort order: +
+ Map-reduce partition columns: '100' (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: '100' (type: string), 101.0 (type: double), _col6 (type: string)
+ expressions: '100' (type: string), 101.0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -115,53 +121,57 @@ STAGE PLANS:
alias: li
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((l_orderkey is not null and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean)
- Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: l_orderkey (type: int), l_linenumber (type: int)
- sort order: ++
- Map-reduce partition columns: l_orderkey (type: int), l_linenumber (type: int)
- Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE
- value expressions: l_partkey (type: int), l_suppkey (type: int)
+ predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
+ Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
Map 3
Map Operator Tree:
TableScan
- alias: lineitem
+ alias: li
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean)
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), 1 (type: int)
+ expressions: l_orderkey (type: int), l_linenumber (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Left Semi Join 0 to 1
keys:
- 0 l_orderkey (type: int), l_linenumber (type: int)
+ 0 _col0 (type: int), _col3 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col1, _col2
- Statistics: Num rows: 13 Data size: 1582 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1582 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 13 Data size: 1582 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out
index cb9eda5..b2e9ffb 100644
--- a/ql/src/test/results/clientpositive/spark/count.q.out
+++ b/ql/src/test/results/clientpositive/spark/count.q.out
@@ -53,11 +53,11 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int), b (type: int), c (type: int), d (type: int)
- outputColumnNames: a, b, c, d
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT b), count(DISTINCT c), sum(d)
- keys: a (type: int), b (type: int), c (type: int)
+ aggregations: count(DISTINCT _col1), count(DISTINCT _col2), sum(_col3)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE
@@ -188,14 +188,14 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int), b (type: int), c (type: int), d (type: int)
- outputColumnNames: a, b, c, d
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: a (type: int), b (type: int), c (type: int)
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
sort order: +++
- Map-reduce partition columns: a (type: int)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE
- value expressions: d (type: int)
+ value expressions: _col3 (type: int)
Reducer 2
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/cross_join.q.out b/ql/src/test/results/clientpositive/spark/cross_join.q.out
index bb25e21..2fb6b21 100644
--- a/ql/src/test/results/clientpositive/spark/cross_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/cross_join.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- current
explain select src.key from src join src src2
PREHOOK: type: QUERY
@@ -21,18 +21,24 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -57,7 +63,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- ansi cross join
explain select src.key from src cross join src src2
PREHOOK: type: QUERY
@@ -80,18 +86,24 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -141,32 +153,40 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out b/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out
index ff58910..c228f0b 100644
--- a/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out
@@ -28,7 +28,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@B
-Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select * from A join B
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from A join B
@@ -49,19 +49,27 @@ STAGE PLANS:
TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string), value (type: string)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
alias: b
Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string), value (type: string)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -70,19 +78,15 @@ STAGE PLANS:
keys:
0
1
- outputColumnNames: _col0, _col1, _col5, _col6
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out
index 6c4e659..ab062ad 100644
--- a/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out
@@ -28,7 +28,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@B
-Warning: Map Join MAPJOIN[7][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain select * from A join B
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from A join B
@@ -48,10 +48,14 @@ STAGE PLANS:
TableScan
alias: b
Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0
- 1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
Local Work:
Map Reduce Local Work
@@ -64,19 +68,19 @@ STAGE PLANS:
TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
diff --git a/ql/src/test/results/clientpositive/spark/groupby1.q.out b/ql/src/test/results/clientpositive/spark/groupby1.q.out
index f661a62..8f60691 100644
--- a/ql/src/test/results/clientpositive/spark/groupby1.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby1.q.out
@@ -35,15 +35,15 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ expressions: key (type: string), substr(value, 5) (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: key (type: string)
+ key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: rand() (type: double)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: substr(value, 5) (type: string)
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/groupby4.q.out b/ql/src/test/results/clientpositive/spark/groupby4.q.out
index 87d2968..b764466 100644
--- a/ql/src/test/results/clientpositive/spark/groupby4.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby4.q.out
@@ -37,11 +37,11 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: substr(key, 1, 1) (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: substr(key, 1, 1) (type: string)
+ key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: rand() (type: double)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out
index 9fe3b72..65bdf2c 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out
@@ -20,13 +20,13 @@ POSTHOOK: Output: database:default
POSTHOOK: Output: default@DEST2
PREHOOK: query: EXPLAIN
FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -41,8 +41,8 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 6 (GROUP, 2)
Reducer 4 <- Map 7 (GROUP, 2)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 5 <- Reducer 4 (GROUP, 1)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+ Reducer 5 <- Reducer 4 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 6
@@ -95,19 +95,17 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: array), _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: array)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: array), VALUE._col1 (type: bigint)
+ expressions: KEY.reducesinkkey0 (type: array), VALUE._col0 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
@@ -127,19 +125,17 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: map), _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: map)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reducer 5
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: map), VALUE._col1 (type: bigint)
+ expressions: KEY.reducesinkkey0 (type: map), VALUE._col0 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
@@ -179,15 +175,15 @@ STAGE PLANS:
Stats-Aggr Operator
PREHOOK: query: FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@dest1
PREHOOK: Output: default@dest2
POSTHOOK: query: FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
@@ -204,16 +200,16 @@ POSTHOOK: query: SELECT DEST1.* FROM DEST1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
-["118"] 2
-["180"] 1
-["201"] 1
-["202"] 1
-["238"] 2
-["273"] 3
-["282"] 2
-["419"] 1
-["432"] 1
-["467"] 1
+["0"] 3
+["10"] 1
+["100"] 2
+["103"] 2
+["104"] 2
+["105"] 1
+["11"] 1
+["111"] 1
+["113"] 2
+["114"] 1
PREHOOK: query: SELECT DEST2.* FROM DEST2
PREHOOK: type: QUERY
PREHOOK: Input: default@dest2
@@ -223,12 +219,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest2
#### A masked pattern was here ####
{"0":"val_0"} 3
-{"138":"val_138"} 4
-{"170":"val_170"} 1
-{"19":"val_19"} 1
-{"222":"val_222"} 1
-{"223":"val_223"} 2
-{"226":"val_226"} 1
-{"489":"val_489"} 4
-{"8":"val_8"} 1
-{"80":"val_80"} 1
+{"10":"val_10"} 1
+{"100":"val_100"} 2
+{"103":"val_103"} 2
+{"104":"val_104"} 2
+{"105":"val_105"} 1
+{"11":"val_11"} 1
+{"111":"val_111"} 1
+{"113":"val_113"} 2
+{"114":"val_114"} 1
diff --git a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out
index 42513a4..21c88b1 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out
@@ -42,11 +42,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), val (type: string), '0' (type: string)
+ keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE
@@ -133,11 +133,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), val (type: string), '0' (type: string)
+ keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE
@@ -223,11 +223,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT val)
- keys: key (type: string), '0' (type: string), val (type: string)
+ aggregations: count(DISTINCT _col1)
+ keys: _col0 (type: string), '0' (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
@@ -302,11 +302,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), val (type: string), '0' (type: string)
+ keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE
@@ -407,11 +407,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT val)
- keys: key (type: string), '0' (type: string), val (type: string)
+ aggregations: count(DISTINCT _col1)
+ keys: _col0 (type: string), '0' (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out
index 16437d5..50d707d 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out
@@ -436,20 +436,24 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <= 20) (type: boolean)
+ predicate: (UDFToDouble(key) <= 20.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -548,7 +552,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (GROUP, 2)
Reducer 6 <- Map 5 (GROUP, 2)
Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
Reducer 4 <- Reducer 3 (SORT, 1)
@@ -560,55 +564,57 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean)
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(DISTINCT substr(value, 5))
- keys: key (type: string), value (type: string), substr(value, 5) (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Map 5
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean)
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: key (type: string), value (type: string)
- mode: hash
+ predicate: (((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Group By Operator
- aggregations: count(DISTINCT KEY._col2:0._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 3
Reduce Operator Tree:
Join Operator
@@ -617,25 +623,21 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3, _col4
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
- sort order: --++
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ sort order: --++
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
Reducer 4
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -646,12 +648,12 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Stage: Stage-0
diff --git a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out
index 6f4854c..92b6d19 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out
@@ -42,11 +42,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), val (type: string), '0' (type: string)
+ keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE
@@ -127,11 +127,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT val)
- keys: key (type: string), '0' (type: string), val (type: string)
+ aggregations: count(DISTINCT _col1)
+ keys: _col0 (type: string), '0' (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
@@ -206,11 +206,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), val (type: string), '0' (type: string)
+ keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE
@@ -305,11 +305,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT val)
- keys: key (type: string), '0' (type: string), val (type: string)
+ aggregations: count(DISTINCT _col1)
+ keys: _col0 (type: string), '0' (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
index 65a8374..6c0f4a5 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
@@ -97,11 +97,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -308,11 +308,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), val (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -980,11 +980,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: 1 (type: int), key (type: string)
+ keys: 1 (type: int), _col1 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1195,11 +1195,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), 1 (type: int), val (type: string)
+ keys: _col0 (type: string), 1 (type: int), _col2 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -1421,12 +1421,12 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), (key + 1) (type: double)
+ keys: _col0 (type: string), _col1 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -1685,28 +1685,32 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(_col1)
- keys: (_col0 + _col0) (type: double)
- mode: hash
+ Select Operator
+ expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1958,11 +1962,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2062,11 +2066,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2330,11 +2334,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2437,12 +2441,12 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: (key + key) (type: double)
+ keys: _col0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -3386,12 +3390,12 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
bucketGroup: true
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -3620,11 +3624,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), 1 (type: int), val (type: string)
+ keys: _col0 (type: string), 1 (type: int), _col2 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -3850,11 +3854,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int)
+ keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int)
mode: final
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
index 9f30e15..8248a70 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
@@ -97,11 +97,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -309,11 +309,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), val (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -998,11 +998,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: 1 (type: int), key (type: string)
+ keys: 1 (type: int), _col1 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -1214,11 +1214,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), 1 (type: int), val (type: string)
+ keys: _col0 (type: string), 1 (type: int), _col2 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -1458,12 +1458,12 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), (key + 1) (type: double)
+ keys: _col0 (type: string), _col1 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -1740,28 +1740,32 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(_col1)
- keys: (_col0 + _col0) (type: double)
- mode: hash
+ Select Operator
+ expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Map-reduce partition columns: rand() (type: double)
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -2030,11 +2034,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2134,11 +2138,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2403,11 +2407,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -2510,12 +2514,12 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: (key + key) (type: double)
+ keys: _col0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -3495,12 +3499,12 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
bucketGroup: true
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -3746,11 +3750,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), 1 (type: int), val (type: string)
+ keys: _col0 (type: string), 1 (type: int), _col2 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -3976,11 +3980,11 @@ STAGE PLANS:
GatherStats: false
Select Operator
expressions: key (type: string), val (type: string)
- outputColumnNames: key, val
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int)
+ keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int)
mode: final
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/having.q.out b/ql/src/test/results/clientpositive/spark/having.q.out
index da6186e..245b27a 100644
--- a/ql/src/test/results/clientpositive/spark/having.q.out
+++ b/ql/src/test/results/clientpositive/spark/having.q.out
@@ -104,20 +104,24 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <> 302) (type: boolean)
+ predicate: (UDFToDouble(key) <> 302.0) (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: max(value)
- keys: key (type: string)
- mode: hash
+ Select Operator
+ expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: max(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -478,11 +482,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(value)
- keys: key (type: string)
+ aggregations: max(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -749,20 +753,24 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key > 300) (type: boolean)
+ predicate: (UDFToDouble(key) > 300.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: max(value)
- keys: key (type: string)
- mode: hash
+ Select Operator
+ expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: max(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -947,11 +955,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(value)
- keys: key (type: string)
+ aggregations: max(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1215,11 +1223,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(value)
- keys: key (type: string)
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out b/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out
index 57942af..c965455 100644
--- a/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out
@@ -25,45 +25,53 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: value (type: string)
- sort order: +
- Map-reduce partition columns: value (type: string)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string)
+ predicate: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and value is not null) (type: boolean)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: value (type: string)
- sort order: +
- Map-reduce partition columns: value (type: string)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string)
+ predicate: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and value is not null) (type: boolean)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 value (type: string)
- 1 value (type: string)
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col5 (type: string)
+ expressions: _col0 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -133,49 +141,57 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- filterExpr: ((value is not null and (key > 80)) and (key < 100)) (type: boolean)
+ filterExpr: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and value is not null) (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: value (type: string)
- sort order: +
- Map-reduce partition columns: value (type: string)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string)
+ predicate: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and value is not null) (type: boolean)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
- filterExpr: ((value is not null and (key > 70)) and (key < 90)) (type: boolean)
+ alias: a
+ filterExpr: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and value is not null) (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: value (type: string)
- sort order: +
- Map-reduce partition columns: value (type: string)
- Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string)
+ predicate: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and value is not null) (type: boolean)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 value (type: string)
- 1 value (type: string)
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col5 (type: string)
+ expressions: _col0 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out
index 4da17fb..94adb3d 100644
--- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out
+++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out
@@ -79,11 +79,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
@@ -164,6 +164,7 @@ Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
+WARNING: Comparing a bigint and a string may result in a loss of precision.
PREHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value
EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
SELECT a.key, a.value FROM (
diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out
index 0f352d7..d947eb5 100644
--- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out
+++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out
@@ -27,7 +27,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@test_table@part=1
POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table
@@ -82,7 +82,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@test_table@part=1
POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table
diff --git a/ql/src/test/results/clientpositive/spark/innerjoin.q.out b/ql/src/test/results/clientpositive/spark/innerjoin.q.out
index 3f6f9ab..762ddad 100644
--- a/ql/src/test/results/clientpositive/spark/innerjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/innerjoin.q.out
@@ -38,37 +38,45 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
+ expressions: UDFToInteger(_col2) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -104,7 +112,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest_j1.* FROM dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
@@ -1214,12 +1222,16 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -1227,7 +1239,7 @@ STAGE PLANS:
Left Outer Join0 to 1
keys:
0 _col0 (type: string)
- 1 key (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/join1.q.out b/ql/src/test/results/clientpositive/spark/join1.q.out
index 145bbe4..4714f11 100644
--- a/ql/src/test/results/clientpositive/spark/join1.q.out
+++ b/ql/src/test/results/clientpositive/spark/join1.q.out
@@ -38,37 +38,45 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
+ expressions: UDFToInteger(_col2) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -104,7 +112,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest_j1.* FROM dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
diff --git a/ql/src/test/results/clientpositive/spark/join10.q.out b/ql/src/test/results/clientpositive/spark/join10.q.out
index e7dd570..8d18683 100644
--- a/ql/src/test/results/clientpositive/spark/join10.q.out
+++ b/ql/src/test/results/clientpositive/spark/join10.q.out
@@ -36,14 +36,15 @@ STAGE PLANS:
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
@@ -53,15 +54,14 @@ STAGE PLANS:
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -70,19 +70,15 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col2, _col3
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/join11.q.out b/ql/src/test/results/clientpositive/spark/join11.q.out
index 2b5eb3a..7d59d0c 100644
--- a/ql/src/test/results/clientpositive/spark/join11.q.out
+++ b/ql/src/test/results/clientpositive/spark/join11.q.out
@@ -35,35 +35,35 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 100) (type: boolean)
+ predicate: (UDFToDouble(key) < 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 100) (type: boolean)
+ predicate: (UDFToDouble(key) < 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -72,10 +72,10 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col3
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
+ expressions: _col2 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/join14.q.out b/ql/src/test/results/clientpositive/spark/join14.q.out
index 336787b..e942087 100644
--- a/ql/src/test/results/clientpositive/spark/join14.q.out
+++ b/ql/src/test/results/clientpositive/spark/join14.q.out
@@ -35,42 +35,50 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key > 100) (type: boolean)
- Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Map 3
- Map Operator Tree:
- TableScan
alias: srcpart
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 100) and key is not null) (type: boolean)
+ predicate: ((UDFToDouble(key) > 100.0) and key is not null) (type: boolean)
Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) > 100.0) (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col3
Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
+ expressions: UDFToInteger(_col3) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/join16.q.out b/ql/src/test/results/clientpositive/spark/join16.q.out
index 000ea08..aecd953 100644
--- a/ql/src/test/results/clientpositive/spark/join16.q.out
+++ b/ql/src/test/results/clientpositive/spark/join16.q.out
@@ -19,30 +19,34 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > 10) and value is not null) and (key > 20)) and (value < 200)) (type: boolean)
- Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(value) < 200.0)) and key is not null) and value is not null) (type: boolean)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
- alias: tab
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean)
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string), value (type: string)
- sort order: ++
- Map-reduce partition columns: key (type: string), value (type: string)
- Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) > 20.0)) and key is not null) and value is not null) (type: boolean)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -50,16 +54,16 @@ STAGE PLANS:
Inner Join 0 to 1
keys:
0 _col0 (type: string), _col1 (type: string)
- 1 key (type: string), value (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col3
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
diff --git a/ql/src/test/results/clientpositive/spark/join17.q.out b/ql/src/test/results/clientpositive/spark/join17.q.out
index 4aa2a05..4aefeb3 100644
--- a/ql/src/test/results/clientpositive/spark/join17.q.out
+++ b/ql/src/test/results/clientpositive/spark/join17.q.out
@@ -78,14 +78,18 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -138,21 +142,25 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -201,7 +209,7 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [src2]
+ /src [src1]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
@@ -209,12 +217,12 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -281,9 +289,9 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest1.* FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/join18.q.out b/ql/src/test/results/clientpositive/spark/join18.q.out
index 3d266ad..ed17bab 100644
--- a/ql/src/test/results/clientpositive/spark/join18.q.out
+++ b/ql/src/test/results/clientpositive/spark/join18.q.out
@@ -48,11 +48,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(value)
- keys: key (type: string)
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -69,11 +69,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT value)
- keys: key (type: string), value (type: string)
+ aggregations: count(DISTINCT _col1)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out
index 5560226..18d79d3 100644
--- a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out
@@ -50,11 +50,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(value)
- keys: key (type: string)
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -71,11 +71,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(DISTINCT value), count(DISTINCT key)
- keys: key (type: string), value (type: string)
+ aggregations: count(DISTINCT _col1), count(DISTINCT _col0)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/join29.q.out b/ql/src/test/results/clientpositive/spark/join29.q.out
index 0b4284c..9479dd1 100644
--- a/ql/src/test/results/clientpositive/spark/join29.q.out
+++ b/ql/src/test/results/clientpositive/spark/join29.q.out
@@ -36,10 +36,10 @@ STAGE PLANS:
Stage: Stage-3
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 4 <- Map 3 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
alias: x
@@ -47,19 +47,23 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reducer 2
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 4
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
@@ -77,10 +81,10 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 4 <- Map 3 (GROUP, 2)
+ Reducer 2 <- Map 1 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
alias: y
@@ -88,19 +92,23 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
@@ -116,12 +124,12 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
+ outputColumnNames: _col1, _col2, _col3
input vertices:
- 0 Reducer 2
+ 1 Reducer 4
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int)
+ expressions: _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col1) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/join31.q.out b/ql/src/test/results/clientpositive/spark/join31.q.out
index a52a8b6..108a1ea 100644
--- a/ql/src/test/results/clientpositive/spark/join31.q.out
+++ b/ql/src/test/results/clientpositive/spark/join31.q.out
@@ -38,10 +38,10 @@ STAGE PLANS:
Stage: Stage-3
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 5 <- Map 4 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 4
Map Operator Tree:
TableScan
alias: x
@@ -49,45 +49,42 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reducer 2
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Reducer 5
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Stage: Stage-1
Spark
Edges:
- Reducer 4 <- Map 3 (GROUP, 2)
- Reducer 5 <- Reducer 4 (GROUP, 2)
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
alias: y
@@ -95,41 +92,42 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Reducer 5
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
outputColumnNames: _col0
- input vertices:
- 0 Reducer 2
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
@@ -143,7 +141,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
- Reducer 5
+ Reducer 3
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -195,7 +193,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Output: default@dest_j1
-POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ]
+POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ]
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/spark/join34.q.out b/ql/src/test/results/clientpositive/spark/join34.q.out
index 8a1ba8d..09b8b6b 100644
--- a/ql/src/test/results/clientpositive/spark/join34.q.out
+++ b/ql/src/test/results/clientpositive/spark/join34.q.out
@@ -158,7 +158,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key < 20) (type: boolean)
+ predicate: (UDFToDouble(key) < 20.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -224,12 +224,12 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: x1
+ alias: x
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key > 100) (type: boolean)
+ predicate: (UDFToDouble(key) > 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -291,7 +291,7 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [x1]
+ /src [x]
Map 4
Map Operator Tree:
TableScan
@@ -300,16 +300,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -367,7 +371,7 @@ STAGE PLANS:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 key (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col1, _col2, _col3
Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -452,7 +456,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out b/ql/src/test/results/clientpositive/spark/join35.q.out
index 1ed9c90..e84c860 100644
--- a/ql/src/test/results/clientpositive/spark/join35.q.out
+++ b/ql/src/test/results/clientpositive/spark/join35.q.out
@@ -168,22 +168,26 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key < 20) (type: boolean)
+ predicate: (UDFToDouble(key) < 20.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -236,27 +240,31 @@ STAGE PLANS:
Map 4
Map Operator Tree:
TableScan
- alias: x1
+ alias: x
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key > 100) (type: boolean)
+ predicate: (UDFToDouble(key) > 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -305,7 +313,7 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [x1]
+ /src [x]
Map 6
Map Operator Tree:
TableScan
@@ -314,16 +322,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -398,7 +410,7 @@ STAGE PLANS:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
- 1 key (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col1, _col2, _col3
Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -500,7 +512,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x1.null, ]
+POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x.null, ]
POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/spark/join4.q.out b/ql/src/test/results/clientpositive/spark/join4.q.out
index eddcf07..55b3a18 100644
--- a/ql/src/test/results/clientpositive/spark/join4.q.out
+++ b/ql/src/test/results/clientpositive/spark/join4.q.out
@@ -58,7 +58,7 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -73,20 +73,20 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
@@ -158,8 +158,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest1.* FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/join40.q.out b/ql/src/test/results/clientpositive/spark/join40.q.out
index 08695ff..02c11cd 100644
--- a/ql/src/test/results/clientpositive/spark/join40.q.out
+++ b/ql/src/test/results/clientpositive/spark/join40.q.out
@@ -22,10 +22,25 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: src
+ alias: x
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: x
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key <= 100) (type: boolean)
+ predicate: (UDFToDouble(key) <= 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -37,38 +52,23 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Map 3
- Map Operator Tree:
- TableScan
- alias: x
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
keys:
- 0 key (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -677,37 +677,45 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col6 (type: string)
+ expressions: _col2 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -3762,32 +3770,40 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
diff --git a/ql/src/test/results/clientpositive/spark/join5.q.out b/ql/src/test/results/clientpositive/spark/join5.q.out
index d645e89..1914ad0 100644
--- a/ql/src/test/results/clientpositive/spark/join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/join5.q.out
@@ -58,7 +58,7 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -73,33 +73,33 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Left Outer Join0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string)
+ expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -158,8 +158,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest1.* FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out b/ql/src/test/results/clientpositive/spark/join6.q.out
index 4770770..5f2a85a 100644
--- a/ql/src/test/results/clientpositive/spark/join6.q.out
+++ b/ql/src/test/results/clientpositive/spark/join6.q.out
@@ -58,7 +58,7 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -73,10 +73,10 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 15) and (key < 25)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -158,8 +158,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest1.* FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/join8.q.out b/ql/src/test/results/clientpositive/spark/join8.q.out
index d242c10..e3fac82 100644
--- a/ql/src/test/results/clientpositive/spark/join8.q.out
+++ b/ql/src/test/results/clientpositive/spark/join8.q.out
@@ -58,7 +58,7 @@ STAGE PLANS:
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean)
+ predicate: (((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean)
Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -73,20 +73,20 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean)
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
@@ -102,7 +102,7 @@ STAGE PLANS:
predicate: _col2 is null (type: boolean)
Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), null (type: int), _col3 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -161,8 +161,8 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest1.* FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
diff --git a/ql/src/test/results/clientpositive/spark/join9.q.out b/ql/src/test/results/clientpositive/spark/join9.q.out
index 92f0237..c7440da 100644
--- a/ql/src/test/results/clientpositive/spark/join9.q.out
+++ b/ql/src/test/results/clientpositive/spark/join9.q.out
@@ -94,13 +94,17 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -162,14 +166,18 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -226,12 +234,12 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col8
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col4
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col8 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col4 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
index dabdcb8..f34153d 100644
--- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join JOIN[4][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select p1.p_name, p2.p_name
from part p1 , part p2
PREHOOK: type: QUERY
@@ -21,19 +21,27 @@ STAGE PLANS:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_name (type: string)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: p2
+ alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_name (type: string)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -42,19 +50,15 @@ STAGE PLANS:
keys:
0
1
- outputColumnNames: _col1, _col13
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col13 (type: string)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/join_vc.q.out b/ql/src/test/results/clientpositive/spark/join_vc.q.out
index 8b25d4f..27265a6 100644
--- a/ql/src/test/results/clientpositive/spark/join_vc.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_vc.q.out
@@ -157,39 +157,47 @@ STAGE PLANS:
alias: t1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 100) (type: boolean)
+ predicate: (UDFToDouble(key) < 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Map 4
Map Operator Tree:
TableScan
- alias: t2
+ alias: t1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 100) (type: boolean)
+ predicate: (UDFToDouble(key) < 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- value expressions: BLOCK__OFFSET__INSIDE__FILE (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col7
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col7 (type: bigint)
+ expressions: _col1 (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out b/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out
index e95d2ab..c6f9039 100644
--- a/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out
+++ b/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out
@@ -555,10 +555,10 @@ STAGE PLANS:
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: string)
- outputColumnNames: hr
+ outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: hr (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
index 1efa9e7..7101beb 100644
--- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
@@ -195,12 +195,12 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string), key (type: string)
- outputColumnNames: value, key
+ expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum((key + 1))
- keys: value (type: string)
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -289,12 +289,12 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string), key (type: string)
- outputColumnNames: value, key
+ expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg((key + 1))
- keys: value (type: string)
+ aggregations: avg(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -384,10 +384,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdouble (type: double)
- outputColumnNames: cdouble
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: cdouble (type: double)
+ keys: _col0 (type: double)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
@@ -564,10 +564,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cdouble (type: double)
- outputColumnNames: ctinyint, cdouble
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: ctinyint (type: tinyint), cdouble (type: double)
+ keys: _col0 (type: tinyint), _col1 (type: double)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
@@ -780,11 +780,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: value (type: string), key (type: string)
- outputColumnNames: value, key
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(key)
- keys: value (type: string)
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -883,10 +883,10 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
- Reducer 5 <- Map 4 (GROUP, 2)
- Reducer 6 <- Reducer 5 (GROUP, 1)
- Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
+ Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -896,11 +896,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -911,18 +911,18 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
value expressions: _col1 (type: bigint)
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -942,18 +942,32 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
- Number of rows: 2
- Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Number of rows: 3
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col0 (type: string), _col1 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
- Reducer 3
+ Reducer 4
Reduce Operator Tree:
Join Operator
condition map:
@@ -961,19 +975,23 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col3
Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 4
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Limit
+ Number of rows: 4
Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 5
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -982,30 +1000,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
- Number of rows: 3
- Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.3
- value expressions: _col0 (type: string), _col1 (type: bigint)
- Reducer 6
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 3
- Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Number of rows: 2
+ Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Stage: Stage-0
@@ -1039,16 +1043,16 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ expressions: value (type: string), key (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: value (type: string)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: value (type: string)
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- value expressions: key (type: string)
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -1293,15 +1297,15 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ expressions: concat(key, value, value, value, value, value, value, value, value, value) (type: string), key (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: concat(key, value, value, value, value, value, value, value, value, value) (type: string)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: concat(key, value, value, value, value, value, value, value, value, value) (type: string)
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: string)
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out
index 3285ad9..4480310 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out
@@ -72,7 +72,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 20) (type: boolean)
+ predicate: (UDFToDouble(key) < 20.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string), '22' (type: string)
@@ -92,7 +92,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 20) and (key < 40)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 20.0) and (UDFToDouble(key) < 40.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string), '33' (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out
index 78cd4bc..190485f 100644
--- a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out
+++ b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out
@@ -118,29 +118,36 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -148,11 +155,13 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -162,55 +171,29 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.srcpart
+ name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -249,14 +232,39 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -264,13 +272,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -280,41 +286,43 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
- Left Outer Join0 to 1
+ Right Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean)
+ predicate: ((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) (type: boolean)
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -504,16 +512,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string), _col2 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -714,16 +726,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -785,16 +801,16 @@ STAGE PLANS:
0 {(VALUE._col1 = '2008-04-08')}
1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col7, _col8
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean)
+ predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean)
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string)
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -983,88 +999,25 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: src
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
alias: b
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
+ predicate: (((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1160,60 +1113,39 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
#### A masked pattern was here ####
- Partition
- base file name: hr=11
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 11
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- numFiles 1
- numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
- rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -1221,13 +1153,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1237,66 +1167,62 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
- /srcpart/ds=2008-04-09/hr=11 [b]
- /srcpart/ds=2008-04-09/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
- Left Outer Join0 to 1
+ Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6, _col7
- Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: (((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean)
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string:string:string:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1316,8 +1242,6 @@ PREHOOK: Input: default@src
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: FROM
src a
@@ -1331,8 +1255,6 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
17 val_17 17 val_17
17 val_17 17 val_17
@@ -1467,16 +1389,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1583,16 +1509,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1649,16 +1579,16 @@ STAGE PLANS:
condition map:
Left Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col7, _col8
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean)
+ predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean)
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string)
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/merge1.q.out b/ql/src/test/results/clientpositive/spark/merge1.q.out
index 41acbdb..e594d47 100644
--- a/ql/src/test/results/clientpositive/spark/merge1.q.out
+++ b/ql/src/test/results/clientpositive/spark/merge1.q.out
@@ -42,11 +42,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/merge2.q.out b/ql/src/test/results/clientpositive/spark/merge2.q.out
index edeebb7..63274eb 100644
--- a/ql/src/test/results/clientpositive/spark/merge2.q.out
+++ b/ql/src/test/results/clientpositive/spark/merge2.q.out
@@ -42,11 +42,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
index 0ccae50..18e1d4e 100644
--- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
@@ -200,10 +200,10 @@ STAGE PLANS:
Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
- outputColumnNames: s, bo, bin, si, i, b
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b)
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
@@ -256,10 +256,10 @@ STAGE PLANS:
Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
- outputColumnNames: s, bo, bin, si, i, b
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b)
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
@@ -448,10 +448,10 @@ STAGE PLANS:
Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ts (type: timestamp)
- outputColumnNames: ts
+ outputColumnNames: _col0
Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(ts)
+ aggregations: count(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/order2.q.out b/ql/src/test/results/clientpositive/spark/order2.q.out
index 22b1ddc..29eb835 100644
--- a/ql/src/test/results/clientpositive/spark/order2.q.out
+++ b/ql/src/test/results/clientpositive/spark/order2.q.out
@@ -43,7 +43,7 @@ STAGE PLANS:
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col0 < 10) (type: boolean)
+ predicate: (UDFToDouble(_col0) < 10.0) (type: boolean)
Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out
index 227886b..0614cb9 100644
--- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out
@@ -120,25 +120,32 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string), _col2 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -146,11 +153,13 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -160,51 +169,29 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.srcpart
+ name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -245,12 +232,12 @@ STAGE PLANS:
name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
- hr 12
+ ds 2008-04-09
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -291,12 +278,12 @@ STAGE PLANS:
name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -335,14 +322,37 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ /srcpart/ds=2008-04-09/hr=11 [b]
+ /srcpart/ds=2008-04-09/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -350,13 +360,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -366,26 +374,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
- /srcpart/ds=2008-04-09/hr=11 [b]
- /srcpart/ds=2008-04-09/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
@@ -393,21 +401,21 @@ STAGE PLANS:
condition map:
Outer Join 0 to 1
filter mappings:
- 1 [0, 1]
+ 0 [1, 1]
filter predicates:
- 0
- 1 {(VALUE._col1 = '2008-04-08')}
+ 0 {(VALUE._col1 = '2008-04-08')}
+ 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((((_col5 > 15) and (_col5 < 25)) and (_col0 > 10)) and (_col0 < 20)) (type: boolean)
+ predicate: ((((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) and (UDFToDouble(_col3) > 10.0)) and (UDFToDouble(_col3) < 20.0)) (type: boolean)
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -596,80 +604,25 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: src
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
alias: b
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -765,60 +718,39 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
#### A masked pattern was here ####
- Partition
- base file name: hr=11
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 11
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- numFiles 1
- numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
- rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -826,13 +758,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -842,43 +772,43 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
- /srcpart/ds=2008-04-09/hr=11 [b]
- /srcpart/ds=2008-04-09/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
- Outer Join 0 to 1
+ Left Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6, _col7
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: (((((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) and (_col0 > 10)) and (_col0 < 20)) (type: boolean)
+ predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean)
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -921,8 +851,6 @@ PREHOOK: Input: default@src
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: FROM
src a
@@ -936,8 +864,6 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
17 val_17 17 val_17
17 val_17 17 val_17
diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out
index 651c4b4..c3454ee 100644
--- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out
+++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out
@@ -120,25 +120,32 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string), _col2 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -146,11 +153,13 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -160,51 +169,29 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.srcpart
+ name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -245,12 +232,12 @@ STAGE PLANS:
name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
- hr 12
+ ds 2008-04-09
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -291,12 +278,12 @@ STAGE PLANS:
name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -335,14 +322,37 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ /srcpart/ds=2008-04-09/hr=11 [b]
+ /srcpart/ds=2008-04-09/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -350,13 +360,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -366,26 +374,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
- /srcpart/ds=2008-04-09/hr=11 [b]
- /srcpart/ds=2008-04-09/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
@@ -393,21 +401,21 @@ STAGE PLANS:
condition map:
Outer Join 0 to 1
filter mappings:
- 1 [0, 1]
+ 0 [1, 1]
filter predicates:
- 0
- 1 {(VALUE._col1 = '2008-04-08')}
+ 0 {(VALUE._col1 = '2008-04-08')}
+ 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean)
+ predicate: ((((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) and (UDFToDouble(_col0) > 15.0)) and (UDFToDouble(_col0) < 25.0)) (type: boolean)
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -596,80 +604,25 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: src
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
alias: b
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -765,60 +718,39 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
#### A masked pattern was here ####
- Partition
- base file name: hr=11
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 11
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- numFiles 1
- numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
- rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -826,13 +758,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -842,43 +772,43 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
- /srcpart/ds=2008-04-09/hr=11 [b]
- /srcpart/ds=2008-04-09/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
- Outer Join 0 to 1
+ Left Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6, _col7
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: (((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean)
+ predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean)
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -921,8 +851,6 @@ PREHOOK: Input: default@src
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: FROM
src a
@@ -936,8 +864,6 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
17 val_17 17 val_17
17 val_17 17 val_17
diff --git a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out
index 15171d9..47555b2 100644
--- a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out
+++ b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out
@@ -38,37 +38,45 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
+ expressions: UDFToInteger(_col2) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -104,7 +112,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT dest_j1.* FROM dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
index 4c22f0b..efadb1d 100644
--- a/ql/src/test/results/clientpositive/spark/pcr.q.out
+++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
@@ -4818,7 +4818,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key = 11) (type: boolean)
+ predicate: (UDFToDouble(key) = 11.0) (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: value (type: string), hr (type: string)
@@ -5047,7 +5047,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key = 11) (type: boolean)
+ predicate: (UDFToDouble(key) = 11.0) (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: value (type: string), ds (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out
index 78db996..31b25b3 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out
@@ -40,41 +40,56 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean)
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col0 < '400') (type: boolean)
- Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ predicate: (_col0 <> '4') (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean)
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col0 < '400') (type: boolean)
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((_col0 > '20') and (((_col1 < 'val_50') or (_col0 > '2')) and (_col0 < '400'))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -83,27 +98,31 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
+ Filter Operator
+ predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean)
Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -315,35 +334,34 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean)
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean)
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -352,13 +370,13 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean)
+ predicate: ((_col0 > '50') or (_col1 < '50')) (type: boolean)
Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string)
+ expressions: _col1 (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/ppd_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_join.q.out
index 441fee1..b97431c 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_join.q.out
@@ -37,42 +37,57 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean)
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col0 < '400') (type: boolean)
- Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ predicate: (_col0 <> '4') (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean)
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col0 < '400') (type: boolean)
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ predicate: ((_col0 > '20') and (((_col1 < 'val_50') or (_col0 > '2')) and (_col0 < '400'))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -81,22 +96,26 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean)
Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -577,17 +596,17 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean)
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
@@ -595,18 +614,17 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean)
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -615,13 +633,13 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean)
+ predicate: ((_col0 > '50') or (_col2 < '50')) (type: boolean)
Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
+ expressions: _col2 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out
index 0cbd718..ed536c2 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out
@@ -127,34 +127,35 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
- Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+ Reducer 4 <- Map 3 (GROUP, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: src
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: string)
- auto parallelism: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -203,8 +204,8 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [src]
- Map 4
+ /src [a]
+ Map 3
Map Operator Tree:
TableScan
alias: a
@@ -214,13 +215,24 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Group By Operator
+ aggregations: min(_col0)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -271,51 +283,26 @@ STAGE PLANS:
Truncated Path -> Alias:
/src [a]
Reducer 2
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double)
- outputColumnNames: _col0, _col2, _col3, _col4
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean)
- Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: _col3 (type: double), _col4 (type: double)
- auto parallelism: false
- Reducer 3
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col8, _col9
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double)
+ expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -331,6 +318,35 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Reducer 4
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean)
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 21 Data size: 222 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 21 Data size: 222 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type: double)
+ auto parallelism: false
Stage: Stage-0
Fetch Operator
@@ -501,33 +517,30 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
- Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+ Reducer 4 <- Map 3 (GROUP, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: src
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: string)
+ tag: 0
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -577,8 +590,8 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [src]
- Map 4
+ /src [a]
+ Map 3
Map Operator Tree:
TableScan
alias: a
@@ -588,13 +601,24 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Group By Operator
+ aggregations: min(_col0)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -645,43 +669,18 @@ STAGE PLANS:
Truncated Path -> Alias:
/src [a]
Reducer 2
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double)
- outputColumnNames: _col0, _col2, _col3, _col4
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: (_col2 < 5.0) (type: boolean)
- Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: _col3 (type: double), _col4 (type: double)
- auto parallelism: false
- Reducer 3
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col8, _col9
+ outputColumnNames: _col0, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double)
+ expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -705,6 +704,31 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Reducer 4
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean)
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type: double)
+ auto parallelism: false
Stage: Stage-0
Fetch Operator
@@ -875,33 +899,30 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
- Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+ Reducer 4 <- Map 3 (GROUP, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: src
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -951,24 +972,31 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [src]
- Map 4
+ /src [a]
+ Map 3
Map Operator Tree:
TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1019,43 +1047,18 @@ STAGE PLANS:
Truncated Path -> Alias:
/src [a]
Reducer 2
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double)
- outputColumnNames: _col0, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean)
- Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: _col3 (type: double), _col4 (type: double)
- auto parallelism: false
- Reducer 3
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col8, _col9
+ outputColumnNames: _col0, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double)
+ expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1079,6 +1082,35 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Reducer 4
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 42 Data size: 445 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 42 Data size: 445 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type: double)
+ auto parallelism: false
Stage: Stage-0
Fetch Operator
@@ -1249,33 +1281,30 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
- Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+ Reducer 4 <- Map 3 (GROUP, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: src
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: string)
+ tag: 0
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -1325,8 +1354,8 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [src]
- Map 4
+ /src [a]
+ Map 3
Map Operator Tree:
TableScan
alias: a
@@ -1336,13 +1365,24 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Group By Operator
+ aggregations: min(_col0)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1393,43 +1433,18 @@ STAGE PLANS:
Truncated Path -> Alias:
/src [a]
Reducer 2
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double)
- outputColumnNames: _col0, _col2, _col3, _col4
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: (_col2 < 5.0) (type: boolean)
- Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: _col3 (type: double), _col4 (type: double)
- auto parallelism: false
- Reducer 3
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 key (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col8, _col9
+ outputColumnNames: _col0, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double)
+ expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1453,6 +1468,31 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Reducer 4
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean)
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type: double)
+ auto parallelism: false
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out
index 67827ed..987c653 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out
@@ -37,52 +37,56 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean)
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -151,52 +155,56 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > 10) and (key < 20)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean)
+ predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean)
Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out
index 5d6a469..eaafd7e 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out
@@ -37,52 +37,65 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > '15') and (key < '25')) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '15') and (_col0 < '25')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > '15') and (key < '25')) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean)
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -271,52 +284,53 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > '15') and (key < '25')) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > '15') and (key < '25')) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean)
- Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out
index 6a0654a..2cb969f 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out
@@ -36,47 +36,66 @@ STAGE PLANS:
TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Filter Operator
+ predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '15') and (_col0 < '25')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Filter Operator
+ predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
- Outer Join 0 to 1
+ Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean)
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -264,47 +283,54 @@ STAGE PLANS:
TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Filter Operator
+ predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Filter Operator
+ predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
- Outer Join 0 to 1
+ Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean)
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out b/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out
index 2fb148a..02e24c3 100644
--- a/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out
@@ -97,11 +97,11 @@ STAGE PLANS:
Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string)
- outputColumnNames: l_shipdate
+ outputColumnNames: _col0
Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(l_shipdate)
- keys: l_shipdate (type: string)
+ aggregations: count(_col0)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
@@ -264,15 +264,15 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__
+ alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
- outputColumnNames: l_shipdate, _count_of_l_shipdate
+ outputColumnNames: _col0, _count_of_l_shipdate
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
- keys: l_shipdate (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
@@ -447,12 +447,12 @@ STAGE PLANS:
alias: lineitem_ix
Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_shipdate (type: string)
- outputColumnNames: l_shipdate
+ expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(l_shipdate)
- keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int)
+ aggregations: count(_col2)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
@@ -585,15 +585,15 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__
+ alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
- outputColumnNames: l_shipdate, _count_of_l_shipdate
+ expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _count_of_l_shipdate
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
- keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
@@ -912,15 +912,15 @@ STAGE PLANS:
Map 2
Map Operator Tree:
TableScan
- alias: null-subquery1:dummy-subquery1:default__lineitem_ix_lineitem_ix_lshipdate_idx__
+ alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
- outputColumnNames: l_shipdate, _count_of_l_shipdate
+ outputColumnNames: _col0, _count_of_l_shipdate
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
- keys: l_shipdate (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
@@ -1004,11 +1004,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: 1 (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: count(key)
- keys: key (type: int)
+ aggregations: count(_col0)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1058,15 +1058,15 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: default__tbl_tbl_key_idx__
+ alias: $hdt$_0:default__tbl_tbl_key_idx__
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), _count_of_key (type: bigint)
- outputColumnNames: key, _count_of_key
+ outputColumnNames: _col0, _count_of_key
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: sum(_count_of_key)
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1168,11 +1168,11 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: default__tbl_tbl_key_idx__
+ alias: $hdt$_0:default__tbl_tbl_key_idx__
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), _count_of_key (type: bigint)
- outputColumnNames: key, _count_of_key
+ outputColumnNames: _col0, _count_of_key
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: sum(_count_of_key)
@@ -1226,10 +1226,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1281,10 +1281,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: value (type: int), key (type: int)
- outputColumnNames: value, key
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: value (type: int), key (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1343,10 +1343,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: 3 (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1401,10 +1401,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1455,11 +1455,11 @@ STAGE PLANS:
alias: tbl
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
- expressions: key (type: int)
- outputColumnNames: key
+ expressions: key (type: int), substr(key, 2, 3) (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int), substr(key, 2, 3) (type: string)
+ keys: _col0 (type: int), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1515,10 +1515,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: value (type: int), key (type: int)
- outputColumnNames: value, key
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: value (type: int), key (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1577,10 +1577,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), 1 (type: int)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int), value (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1632,10 +1632,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1687,10 +1687,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1742,10 +1742,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1797,10 +1797,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), value (type: int)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int), value (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1855,10 +1855,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), 2 (type: int)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int), value (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1913,10 +1913,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: 3 (type: int), 2 (type: int)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int), value (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1969,16 +1969,20 @@ STAGE PLANS:
Filter Operator
predicate: (value = key) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Group By Operator
- keys: key (type: int), value (type: int)
- mode: hash
+ Select Operator
+ expressions: key (type: int), value (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2023,16 +2027,20 @@ STAGE PLANS:
Filter Operator
predicate: (value = key) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Group By Operator
- keys: key (type: int), substr(value, 2, 3) (type: string)
- mode: hash
+ Select Operator
+ expressions: key (type: int), substr(value, 2, 3) (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2075,11 +2083,11 @@ STAGE PLANS:
alias: tbl
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
- expressions: key (type: int), value (type: int)
- outputColumnNames: key, value
+ expressions: key (type: int), substr(value, 2, 3) (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int), substr(value, 2, 3) (type: string)
+ keys: _col0 (type: int), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -2134,10 +2142,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), 2 (type: int)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- keys: key (type: int), value (type: int)
+ keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -2153,17 +2161,13 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), 2 (type: int)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -2279,18 +2283,22 @@ STAGE PLANS:
Filter Operator
predicate: (key < 10) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(key)
- keys: key (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2373,18 +2381,18 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: default__tblpart_tbl_part_index__
+ alias: $hdt$_0:default__tblpart_tbl_part_index__
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key < 10) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), _count_of_key (type: bigint)
- outputColumnNames: key, _count_of_key
+ outputColumnNames: _col0, _count_of_key
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_key)
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -2488,11 +2496,11 @@ STAGE PLANS:
Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(key)
- keys: key (type: int)
+ aggregations: count(_col0)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE
@@ -2568,15 +2576,15 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: default__tbl_tbl_key_idx__
+ alias: $hdt$_0:default__tbl_tbl_key_idx__
Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), _count_of_key (type: bigint)
- outputColumnNames: key, _count_of_key
+ outputColumnNames: _col0, _count_of_key
Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_key)
- keys: key (type: int)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out
index 36af99a..12f1abb 100644
--- a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out
+++ b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out
@@ -118,29 +118,36 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string), _col2 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -148,11 +155,13 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -162,55 +171,29 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.srcpart
+ name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -251,12 +234,12 @@ STAGE PLANS:
name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
- hr 12
+ ds 2008-04-09
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -297,12 +280,12 @@ STAGE PLANS:
name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -341,14 +324,41 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ /srcpart/ds=2008-04-09/hr=11 [b]
+ /srcpart/ds=2008-04-09/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -356,13 +366,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -372,48 +380,48 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
- /srcpart/ds=2008-04-09/hr=11 [b]
- /srcpart/ds=2008-04-09/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Left Outer Join0 to 1
filter mappings:
- 1 [0, 1]
+ 0 [1, 1]
filter predicates:
- 0
- 1 {(VALUE._col1 = '2008-04-08')}
+ 0 {(VALUE._col1 = '2008-04-08')}
+ 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean)
+ predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean)
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -607,16 +615,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -723,16 +735,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -789,16 +805,16 @@ STAGE PLANS:
condition map:
Right Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col7, _col8
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean)
+ predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean)
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string)
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -983,29 +999,36 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -1013,11 +1036,13 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1027,55 +1052,29 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
- Truncated Path -> Alias:
- /src [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.srcpart
+ name: default.srcpart
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
- hr 11
+ hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -1114,14 +1113,39 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [b]
+ /srcpart/ds=2008-04-08/hr=12 [b]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=12
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 12
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -1129,13 +1153,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1145,41 +1167,43 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [b]
- /srcpart/ds=2008-04-08/hr=12 [b]
+ /src [a]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Left Outer Join0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col5, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean)
+ predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean)
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1365,20 +1389,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string), ds (type: string)
- auto parallelism: false
+ predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1474,103 +1502,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
-#### A masked pattern was here ####
- Partition
- base file name: hr=11
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 11
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- numFiles 1
- numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
- rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
-#### A masked pattern was here ####
- Partition
- base file name: hr=12
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- hr 12
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- numFiles 1
- numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
- rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value
- columns.comments 'default','default'
- columns.types string:string
-#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [a]
/srcpart/ds=2008-04-08/hr=12 [a]
- /srcpart/ds=2008-04-09/hr=11 [a]
- /srcpart/ds=2008-04-09/hr=12 [a]
Map 3
Map Operator Tree:
TableScan
@@ -1579,16 +1513,20 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((key > 15) and (key < 25)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ predicate: (((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1643,41 +1581,37 @@ STAGE PLANS:
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Inner Join 0 to 1
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col1, _col2, _col7, _col8
- Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- isSamplingPred: false
- predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean)
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string:string:string:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1697,8 +1631,6 @@ PREHOOK: Input: default@src
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: FROM
srcpart a
@@ -1712,8 +1644,6 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
17 val_17 17 val_17
17 val_17 17 val_17
diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out
index c9c09e8..1f6aac2 100644
--- a/ql/src/test/results/clientpositive/spark/semijoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out
@@ -2516,14 +2516,18 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (value is not null and (key > 100)) (type: boolean)
- Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: value (type: string)
- sort order: +
- Map-reduce partition columns: value (type: string)
- Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- value expressions: key (type: int)
+ predicate: ((key > 100) and value is not null) (type: boolean)
+ Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
Map 3
Map Operator Tree:
TableScan
@@ -2534,10 +2538,10 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: value (type: string)
- outputColumnNames: value
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: value (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE
@@ -2552,7 +2556,7 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 value (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/skewjoin.q.out b/ql/src/test/results/clientpositive/spark/skewjoin.q.out
index 5fb2ecb..ec74786 100644
--- a/ql/src/test/results/clientpositive/spark/skewjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/skewjoin.q.out
@@ -105,25 +105,33 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: src2
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -131,12 +139,12 @@ STAGE PLANS:
Inner Join 0 to 1
handleSkewJoin: true
keys:
- 0 key (type: string)
- 1 key (type: string)
- outputColumnNames: _col0, _col6
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
+ expressions: UDFToInteger(_col2) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -178,9 +186,9 @@ STAGE PLANS:
keys:
0 reducesinkkey0 (type: string)
1 reducesinkkey0 (type: string)
- outputColumnNames: _col0, _col6
+ outputColumnNames: _col1, _col2
Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col6 (type: string)
+ expressions: UDFToInteger(_col2) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -218,7 +226,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT sum(hash(key)), sum(hash(value)) FROM dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
@@ -625,14 +633,15 @@ STAGE PLANS:
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 4
Map Operator Tree:
TableScan
@@ -642,15 +651,14 @@ STAGE PLANS:
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -660,19 +668,23 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col2, _col3
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(hash(_col2)), sum(hash(_col3))
- mode: hash
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-4
Conditional Operator
@@ -704,18 +716,22 @@ STAGE PLANS:
keys:
0 reducesinkkey0 (type: string)
1 reducesinkkey0 (type: string)
- outputColumnNames: _col2, _col3
- Group By Operator
- aggregations: sum(hash(_col2)), sum(hash(_col3))
- mode: hash
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -824,16 +840,16 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean)
+ predicate: (key is not null and (UDFToDouble(substring(value, 5)) + 1.0) is not null) (type: boolean)
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string), (substring(_col1, 5) + 1) (type: double)
+ key expressions: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double)
sort order: ++
- Map-reduce partition columns: _col0 (type: string), (substring(_col1, 5) + 1) (type: double)
+ Map-reduce partition columns: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double)
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
@@ -844,20 +860,24 @@ STAGE PLANS:
handleSkewJoin: true
keys:
0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double)
- 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double)
+ 1 _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double)
outputColumnNames: _col2, _col3
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(hash(_col2)), sum(hash(_col3))
- mode: hash
+ Select Operator
+ expressions: hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-4
Conditional Operator
@@ -890,17 +910,21 @@ STAGE PLANS:
0 reducesinkkey0 (type: string), reducesinkkey1 (type: double)
1 reducesinkkey0 (type: string), reducesinkkey1 (type: double)
outputColumnNames: _col2, _col3
- Group By Operator
- aggregations: sum(hash(_col2)), sum(hash(_col3))
- mode: hash
+ Select Operator
+ expressions: hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
diff --git a/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out b/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out
index 8afc656..4025885 100644
--- a/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out
+++ b/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out
@@ -29,25 +29,33 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 4
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -55,8 +63,8 @@ STAGE PLANS:
Inner Join 0 to 1
handleSkewJoin: true
keys:
- 0 key (type: string)
- 1 key (type: string)
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/stats1.q.out b/ql/src/test/results/clientpositive/spark/stats1.q.out
index f00db10..ec2edc4 100644
--- a/ql/src/test/results/clientpositive/spark/stats1.q.out
+++ b/ql/src/test/results/clientpositive/spark/stats1.q.out
@@ -121,7 +121,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Output: default@tmptable
POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION [(src1)s2.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key, x.value
PREHOOK: type: QUERY
PREHOOK: Input: default@tmptable
diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out
index 2840729..cceceef 100644
--- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out
+++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out
@@ -90,10 +90,10 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: double), b (type: int), c (type: string), d (type: smallint)
- outputColumnNames: a, b, c, d
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(), count(a), count(b), count(c), count(d)
+ aggregations: count(), count(_col0), count(_col1), count(_col2), count(_col3)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
@@ -146,10 +146,10 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: double), b (type: int), c (type: string), d (type: smallint)
- outputColumnNames: a, b, c, d
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(), count(a), count(b), count(c), count(d)
+ aggregations: count(), count(_col0), count(_col1), count(_col2), count(_col3)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
index 854ca14..28eda26 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
@@ -43,25 +43,29 @@ STAGE PLANS:
Filter Operator
predicate: (value is not null and key is not null) (type: boolean)
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: value (type: string), key (type: string)
- sort order: ++
- Map-reduce partition columns: value (type: string), key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((value > 'val_9') and key is not null) (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: value (type: string), key (type: string)
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col1 (type: string), _col2 (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
@@ -76,7 +80,7 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 value (type: string), key (type: string)
+ 0 _col1 (type: string), _col0 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index 6cf1ae4..00b3399 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -33,16 +33,20 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: s1
+ alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key > '9') (type: boolean)
@@ -67,7 +71,7 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 key (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
@@ -147,15 +151,19 @@ STAGE PLANS:
Filter Operator
predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string), value (type: string)
- sort order: ++
- Map-reduce partition columns: key (type: string), value (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((key > '9') and value is not null) (type: boolean)
@@ -180,7 +188,7 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 key (type: string), value (type: string)
+ 0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
@@ -267,15 +275,19 @@ STAGE PLANS:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: UDFToDouble(p_size) is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: UDFToDouble(p_size) (type: double)
- sort order: +
- Map-reduce partition columns: UDFToDouble(p_size) (type: double)
+ Select Operator
+ expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_name (type: string), p_size (type: int)
+ Reduce Output Operator
+ key expressions: _col2 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: double)
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: int)
Map 3
Map Operator Tree:
TableScan
@@ -292,21 +304,17 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 UDFToDouble(p_size) (type: double)
+ 0 _col2 (type: double)
1 _col0 (type: double)
- outputColumnNames: _col1, _col5
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col5 (type: int)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
Reduce Operator Tree:
Select Operator
@@ -438,16 +446,20 @@ STAGE PLANS:
Filter Operator
predicate: (p_size is not null and p_mfgr is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: p_size (type: int), p_mfgr (type: string)
- sort order: ++
- Map-reduce partition columns: p_size (type: int), p_mfgr (type: string)
+ Select Operator
+ expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_name (type: string)
+ Reduce Output Operator
+ key expressions: _col2 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: int), _col1 (type: string)
+ Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Map 3
Map Operator Tree:
TableScan
- alias: part
+ alias: b
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_mfgr (type: string), p_size (type: int)
@@ -460,12 +472,12 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 p_size (type: int), p_mfgr (type: string)
+ 0 _col2 (type: int), _col1 (type: string)
1 _col0 (type: int), _col1 (type: string)
- outputColumnNames: _col1, _col2, _col5
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int)
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -617,36 +629,44 @@ STAGE PLANS:
Filter Operator
predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string), value (type: string)
- sort order: ++
- Map-reduce partition columns: key (type: string), value (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((key > '9') and value is not null) (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: key (type: string), value (type: string)
- mode: hash
+ Select Operator
+ expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
Left Semi Join 0 to 1
keys:
- 0 key (type: string), value (type: string)
+ 0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
@@ -756,9 +776,9 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
- Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
- Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+ Reducer 6 <- Map 5 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -767,33 +787,19 @@ STAGE PLANS:
alias: lineitem
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: l_partkey is not null (type: boolean)
- Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: l_partkey (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Map 5
- Map Operator Tree:
- TableScan
- alias: li
- Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean)
- Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: l_partkey (type: int)
- sort order: +
- Map-reduce partition columns: l_partkey (type: int)
- Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE
- value expressions: l_orderkey (type: int), l_suppkey (type: int)
- Map 6
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int)
+ Map 4
Map Operator Tree:
TableScan
alias: lineitem
@@ -815,46 +821,56 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: l_partkey is not null (type: boolean)
+ Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_partkey (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
- Reduce Operator Tree:
Join Operator
condition map:
- Inner Join 0 to 1
+ Left Semi Join 0 to 1
keys:
0 _col0 (type: int)
- 1 l_partkey (type: int)
- outputColumnNames: _col0, _col1, _col3
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col3 (type: int)
- Reducer 4
+ value expressions: _col2 (type: int)
+ Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
- Left Semi Join 0 to 1
+ Inner Join 0 to 1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
- outputColumnNames: _col0, _col3
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col3 (type: int)
+ expressions: _col1 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -864,6 +880,18 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out
index 92a8595..1bfdba2 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out
@@ -237,19 +237,19 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 = 0) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 0 (type: bigint)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: bigint)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
Move Operator
@@ -580,16 +580,16 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 = 0) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 0 (type: bigint)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: bigint)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0
diff --git a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out
index 3660f1a..7576b48 100644
--- a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out
+++ b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out
@@ -22,7 +22,7 @@ SELECT key, count(1) FROM T1 GROUP BY key
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_2
+Operator:GBY_3
Table:default@t1
Keys:key
@@ -35,7 +35,7 @@ PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_2
+Operator:GBY_3
Table:default@t1
Keys:key,val
@@ -77,7 +77,7 @@ SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_2
+Operator:GBY_3
Table:default@t1
Keys:key
@@ -90,7 +90,7 @@ PREHOOK: query: SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_2
+Operator:GBY_3
Table:default@t1
Keys:key,val
@@ -104,7 +104,7 @@ PREHOOK: query: SELECT key, 1, val, 2, count(1) FROM T1 GROUP BY key, 1, val, 2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_2
+Operator:GBY_3
Table:default@t1
Keys:key,val
@@ -130,7 +130,7 @@ group by key + key
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_2
+Operator:GBY_3
Table:default@t1
Keys:key
@@ -148,11 +148,11 @@ SELECT key, count(1) as c FROM T1 GROUP BY key
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_2
+Operator:GBY_3
Table:default@t1
Keys:key
-Operator:GBY_8
+Operator:GBY_10
Table:default@t1
Keys:key
@@ -242,7 +242,7 @@ GROUP BY key, constant3, val
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
-Operator:GBY_4
+Operator:GBY_3
Table:default@t1
Keys:key,val
diff --git a/ql/src/test/results/clientpositive/spark/temp_table.q.out b/ql/src/test/results/clientpositive/spark/temp_table.q.out
index 0801abc..65e256d 100644
--- a/ql/src/test/results/clientpositive/spark/temp_table.q.out
+++ b/ql/src/test/results/clientpositive/spark/temp_table.q.out
@@ -19,7 +19,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 2) = 0) (type: boolean)
+ predicate: ((UDFToDouble(key) % 2.0) = 0.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -85,7 +85,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 2) = 1) (type: boolean)
+ predicate: ((UDFToDouble(key) % 2.0) = 1.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/union.q.out b/ql/src/test/results/clientpositive/spark/union.q.out
index 2644473..a78504f 100644
--- a/ql/src/test/results/clientpositive/spark/union.q.out
+++ b/ql/src/test/results/clientpositive/spark/union.q.out
@@ -35,7 +35,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 100) (type: boolean)
+ predicate: (UDFToDouble(key) < 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -54,7 +54,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key > 100) (type: boolean)
+ predicate: (UDFToDouble(key) > 100.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/union10.q.out b/ql/src/test/results/clientpositive/spark/union10.q.out
index 9698dda..5e8fe38 100644
--- a/ql/src/test/results/clientpositive/spark/union10.q.out
+++ b/ql/src/test/results/clientpositive/spark/union10.q.out
@@ -63,7 +63,7 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -79,7 +79,7 @@ STAGE PLANS:
Map 5
Map Operator Tree:
TableScan
- alias: s3
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -194,7 +194,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@tmptable
POSTHOOK: Lineage: tmptable.key EXPRESSION []
-POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, (src)s3.null, ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION []
PREHOOK: query: select * from tmptable x sort by x.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tmptable
diff --git a/ql/src/test/results/clientpositive/spark/union11.q.out b/ql/src/test/results/clientpositive/spark/union11.q.out
index 300bad4..20c27c7 100644
--- a/ql/src/test/results/clientpositive/spark/union11.q.out
+++ b/ql/src/test/results/clientpositive/spark/union11.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
Map 4
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -67,7 +67,7 @@ STAGE PLANS:
Map 6
Map Operator Tree:
TableScan
- alias: s3
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -91,22 +91,18 @@ STAGE PLANS:
expressions: 'tst1' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -133,22 +129,18 @@ STAGE PLANS:
expressions: 'tst2' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reducer 7
Reduce Operator Tree:
Group By Operator
@@ -160,22 +152,18 @@ STAGE PLANS:
expressions: 'tst3' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/union12.q.out b/ql/src/test/results/clientpositive/spark/union12.q.out
index b9ddfc8..cf2c7b7 100644
--- a/ql/src/test/results/clientpositive/spark/union12.q.out
+++ b/ql/src/test/results/clientpositive/spark/union12.q.out
@@ -198,7 +198,7 @@ POSTHOOK: Input: default@src1
POSTHOOK: Input: default@srcbucket
POSTHOOK: Output: default@tmptable
POSTHOOK: Lineage: tmptable.key EXPRESSION []
-POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.null, (srcbucket)s3.null, ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION []
PREHOOK: query: select * from tmptable x sort by x.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tmptable
diff --git a/ql/src/test/results/clientpositive/spark/union13.q.out b/ql/src/test/results/clientpositive/spark/union13.q.out
index fca251c..0297988 100644
--- a/ql/src/test/results/clientpositive/spark/union13.q.out
+++ b/ql/src/test/results/clientpositive/spark/union13.q.out
@@ -40,7 +40,7 @@ STAGE PLANS:
Map 2
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/union14.q.out b/ql/src/test/results/clientpositive/spark/union14.q.out
index 77051df..0c9542b 100644
--- a/ql/src/test/results/clientpositive/spark/union14.q.out
+++ b/ql/src/test/results/clientpositive/spark/union14.q.out
@@ -37,22 +37,18 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Map 3
Map Operator Tree:
TableScan
@@ -95,22 +91,18 @@ STAGE PLANS:
expressions: 'tst1' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/union15.q.out b/ql/src/test/results/clientpositive/spark/union15.q.out
index c51dd74..6be13c9 100644
--- a/ql/src/test/results/clientpositive/spark/union15.q.out
+++ b/ql/src/test/results/clientpositive/spark/union15.q.out
@@ -55,47 +55,39 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Map 5
Map Operator Tree:
TableScan
- alias: s3
+ alias: s2
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -107,22 +99,18 @@ STAGE PLANS:
expressions: 'tst1' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/union2.q.out b/ql/src/test/results/clientpositive/spark/union2.q.out
index 881ce21..e4afb1b 100644
--- a/ql/src/test/results/clientpositive/spark/union2.q.out
+++ b/ql/src/test/results/clientpositive/spark/union2.q.out
@@ -44,7 +44,7 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/spark/union20.q.out b/ql/src/test/results/clientpositive/spark/union20.q.out
index 546355f..6f0dca6 100644
--- a/ql/src/test/results/clientpositive/spark/union20.q.out
+++ b/ql/src/test/results/clientpositive/spark/union20.q.out
@@ -58,10 +58,10 @@ STAGE PLANS:
Map 4
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 10) (type: boolean)
+ predicate: (UDFToDouble(key) < 10.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -76,7 +76,7 @@ STAGE PLANS:
Map 5
Map Operator Tree:
TableScan
- alias: s3
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -92,10 +92,10 @@ STAGE PLANS:
Map 7
Map Operator Tree:
TableScan
- alias: s4
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key < 10) (type: boolean)
+ predicate: (UDFToDouble(key) < 10.0) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/union24.q.out b/ql/src/test/results/clientpositive/spark/union24.q.out
index 7a05198..658ff9e 100644
--- a/ql/src/test/results/clientpositive/spark/union24.q.out
+++ b/ql/src/test/results/clientpositive/spark/union24.q.out
@@ -198,7 +198,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key < 10) (type: boolean)
+ predicate: (UDFToDouble(key) < 10.0) (type: boolean)
Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), count (type: bigint)
@@ -282,7 +282,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key < 10) (type: boolean)
+ predicate: (UDFToDouble(key) < 10.0) (type: boolean)
Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), count (type: bigint)
@@ -366,7 +366,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key < 10) (type: boolean)
+ predicate: (UDFToDouble(key) < 10.0) (type: boolean)
Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), count (type: bigint)
@@ -450,22 +450,26 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key < 10) (type: boolean)
+ predicate: (UDFToDouble(key) < 10.0) (type: boolean)
Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
diff --git a/ql/src/test/results/clientpositive/spark/union25.q.out b/ql/src/test/results/clientpositive/spark/union25.q.out
index db7dfc5..5193c06 100644
--- a/ql/src/test/results/clientpositive/spark/union25.q.out
+++ b/ql/src/test/results/clientpositive/spark/union25.q.out
@@ -100,43 +100,35 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Map 5
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/union28.q.out b/ql/src/test/results/clientpositive/spark/union28.q.out
index 5160879..98582df 100644
--- a/ql/src/test/results/clientpositive/spark/union28.q.out
+++ b/ql/src/test/results/clientpositive/spark/union28.q.out
@@ -73,20 +73,18 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(1)
- keys: key (type: string), value (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
Map 4
Map Operator Tree:
TableScan
@@ -94,76 +92,56 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(1)
- keys: key (type: string), value (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.union_subq_union
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.union_subq_union
Reducer 5
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.union_subq_union
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.union_subq_union
Stage: Stage-0
Move Operator
diff --git a/ql/src/test/results/clientpositive/spark/union30.q.out b/ql/src/test/results/clientpositive/spark/union30.q.out
index a81417d..3409623 100644
--- a/ql/src/test/results/clientpositive/spark/union30.q.out
+++ b/ql/src/test/results/clientpositive/spark/union30.q.out
@@ -87,20 +87,18 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(1)
- keys: key (type: string), value (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
Map 4
Map Operator Tree:
TableScan
@@ -108,20 +106,18 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(1)
- keys: key (type: string), value (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
Map 6
Map Operator Tree:
TableScan
@@ -146,59 +142,41 @@ STAGE PLANS:
Reducer 3
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.union_subq_union
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.union_subq_union
Reducer 5
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.union_subq_union
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.union_subq_union
Stage: Stage-0
Move Operator
diff --git a/ql/src/test/results/clientpositive/spark/union33.q.out b/ql/src/test/results/clientpositive/spark/union33.q.out
index 680a627..0e6b1aa 100644
--- a/ql/src/test/results/clientpositive/spark/union33.q.out
+++ b/ql/src/test/results/clientpositive/spark/union33.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 0) (type: boolean)
+ predicate: (UDFToDouble(key) = 0.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: '0' (type: string), value (type: string)
@@ -72,11 +72,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -205,11 +205,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -225,7 +225,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 0) (type: boolean)
+ predicate: (UDFToDouble(key) = 0.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: '0' (type: string), value (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/union4.q.out b/ql/src/test/results/clientpositive/spark/union4.q.out
index cdda727..c121ef0 100644
--- a/ql/src/test/results/clientpositive/spark/union4.q.out
+++ b/ql/src/test/results/clientpositive/spark/union4.q.out
@@ -60,7 +60,7 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -148,7 +148,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@tmptable
POSTHOOK: Lineage: tmptable.key EXPRESSION []
-POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION []
PREHOOK: query: select * from tmptable x sort by x.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tmptable
diff --git a/ql/src/test/results/clientpositive/spark/union5.q.out b/ql/src/test/results/clientpositive/spark/union5.q.out
index a65c4e8..afee988 100644
--- a/ql/src/test/results/clientpositive/spark/union5.q.out
+++ b/ql/src/test/results/clientpositive/spark/union5.q.out
@@ -46,7 +46,7 @@ STAGE PLANS:
Map 4
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -70,22 +70,18 @@ STAGE PLANS:
expressions: 'tst1' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -112,22 +108,18 @@ STAGE PLANS:
expressions: 'tst2' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/spark/union6.q.out b/ql/src/test/results/clientpositive/spark/union6.q.out
index 8146e12..6176beb 100644
--- a/ql/src/test/results/clientpositive/spark/union6.q.out
+++ b/ql/src/test/results/clientpositive/spark/union6.q.out
@@ -121,7 +121,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Output: default@tmptable
POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION [(src1)s2.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from tmptable x sort by x.key, x.value
PREHOOK: type: QUERY
PREHOOK: Input: default@tmptable
diff --git a/ql/src/test/results/clientpositive/spark/union7.q.out b/ql/src/test/results/clientpositive/spark/union7.q.out
index 7427959..4a81283 100644
--- a/ql/src/test/results/clientpositive/spark/union7.q.out
+++ b/ql/src/test/results/clientpositive/spark/union7.q.out
@@ -51,22 +51,18 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -78,22 +74,18 @@ STAGE PLANS:
expressions: 'tst1' (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col1 (type: bigint)
+ value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/union8.q.out b/ql/src/test/results/clientpositive/spark/union8.q.out
index d7211ed..03545f5 100644
--- a/ql/src/test/results/clientpositive/spark/union8.q.out
+++ b/ql/src/test/results/clientpositive/spark/union8.q.out
@@ -42,7 +42,7 @@ STAGE PLANS:
Map 2
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -58,7 +58,7 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: s3
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/union9.q.out b/ql/src/test/results/clientpositive/spark/union9.q.out
index 804ca1e..d420ef1 100644
--- a/ql/src/test/results/clientpositive/spark/union9.q.out
+++ b/ql/src/test/results/clientpositive/spark/union9.q.out
@@ -46,7 +46,7 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: s2
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
@@ -64,7 +64,7 @@ STAGE PLANS:
Map 4
Map Operator Tree:
TableScan
- alias: s3
+ alias: s1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out
index bf0fc20..ba0e293 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out
@@ -79,11 +79,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -100,11 +100,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
index a9d4b51..2718775 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
@@ -98,7 +98,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(1) (type: bigint)
+ expressions: key (type: string), 1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -116,11 +116,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -136,7 +136,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(2) (type: bigint)
+ expressions: key (type: string), 2 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
index 58abd21..26cfbab 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
@@ -85,11 +85,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -106,11 +106,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
index 356e79a..7a7aaf2 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
@@ -88,11 +88,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -109,11 +109,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out
index 2c01a5c..a5e15c5 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out
@@ -83,11 +83,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), ds (type: string)
- outputColumnNames: key, ds
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), ds (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -104,11 +104,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), ds (type: string)
- outputColumnNames: key, ds
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string), ds (type: string)
+ keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out
index d1e2312..ad44400 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out
@@ -83,11 +83,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -104,11 +104,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -273,49 +273,49 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: inputtbl1
- Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 7) (type: boolean)
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ predicate: (UDFToDouble(key) = 7.0) (type: boolean)
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: '7' (type: string)
- outputColumnNames: key
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Map 3
Map Operator Tree:
TableScan
alias: inputtbl1
- Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 7) (type: boolean)
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ predicate: (UDFToDouble(key) = 7.0) (type: boolean)
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: '7' (type: string)
- outputColumnNames: key
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
@@ -324,23 +324,15 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col1
- Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '7' (type: string), _col1 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
Reducer 4
Reduce Operator Tree:
Group By Operator
@@ -348,23 +340,15 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col1
- Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '7' (type: string), _col1 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
Stage: Stage-0
Move Operator
@@ -451,43 +435,49 @@ STAGE PLANS:
TableScan
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ Filter Operator
+ predicate: ((UDFToDouble(key) + UDFToDouble(key)) >= 7.0) (type: boolean)
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Map 3
Map Operator Tree:
TableScan
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ Filter Operator
+ predicate: ((UDFToDouble(key) + UDFToDouble(key)) >= 7.0) (type: boolean)
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -497,20 +487,17 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: (_col0 + _col0) (type: double), _col1 (type: bigint)
+ expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col0 >= 7.0) (type: boolean)
- Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
Reducer 4
Reduce Operator Tree:
Group By Operator
@@ -520,20 +507,17 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: (_col0 + _col0) (type: double), _col1 (type: bigint)
+ expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col0 >= 7.0) (type: boolean)
- Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
Stage: Stage-0
Move Operator
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out
index 59d88cb..26c4eff 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out
@@ -84,11 +84,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -104,7 +104,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(1) (type: bigint)
+ expressions: key (type: string), 1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -121,7 +121,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(2) (type: bigint)
+ expressions: key (type: string), 2 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out
index f57353f..1d67177 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out
@@ -81,11 +81,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -102,11 +102,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out
index 48867fb..9f5b070 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out
@@ -81,20 +81,18 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Map 3
Map Operator Tree:
TableScan
@@ -102,68 +100,48 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
Reducer 4
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
Stage: Stage-0
Move Operator
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_22.q.out b/ql/src/test/results/clientpositive/spark/union_remove_22.q.out
index c41e12f..2e01432 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_22.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_22.q.out
@@ -284,11 +284,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -305,11 +305,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out
index e989ed2..2659798 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out
@@ -77,11 +77,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -98,11 +98,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
index a5a0126..0a94684 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
@@ -95,11 +95,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -116,11 +116,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out
index 7d94d1d..6c3d596 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out
@@ -84,11 +84,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -105,11 +105,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out
index b31b3e6..55baa8b 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out
@@ -91,11 +91,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -111,7 +111,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(1) (type: bigint)
+ expressions: key (type: string), 1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -128,7 +128,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(2) (type: bigint)
+ expressions: key (type: string), 2 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out
index 988973f..c981ae4 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out
@@ -432,11 +432,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -453,11 +453,11 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
index 2a5eccf..084fbd6 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
@@ -83,11 +83,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -104,11 +104,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
index 7aee2af..f580bd8 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
@@ -88,11 +88,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
@@ -108,7 +108,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(1) (type: bigint)
+ expressions: key (type: string), 1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -125,7 +125,7 @@ STAGE PLANS:
alias: inputtbl1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), UDFToLong(2) (type: bigint)
+ expressions: key (type: string), 2 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
index 87ceca2..0931d11 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
@@ -95,11 +95,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/union_top_level.q.out b/ql/src/test/results/clientpositive/spark/union_top_level.q.out
index 5136989..dede1ef 100644
--- a/ql/src/test/results/clientpositive/spark/union_top_level.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_top_level.q.out
@@ -37,7 +37,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 0) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 0 (type: int)
@@ -56,7 +56,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 1) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 1 (type: int)
@@ -75,7 +75,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 2) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 2 (type: int)
@@ -414,7 +414,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 0) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 0 (type: int)
@@ -433,7 +433,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 1) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 1 (type: int)
@@ -452,7 +452,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 2) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 2 (type: int)
@@ -616,7 +616,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 0) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 0 (type: int)
@@ -635,7 +635,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 1) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 1 (type: int)
@@ -654,7 +654,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 2) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 2 (type: int)
@@ -805,7 +805,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 0) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 0 (type: int)
@@ -824,7 +824,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 1) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 1 (type: int)
@@ -843,7 +843,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key % 3) = 2) (type: boolean)
+ predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), 2 (type: int)
diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out
index aaac8aa..9e1402b 100644
--- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out
@@ -133,11 +133,11 @@ STAGE PLANS:
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i (type: int)
- outputColumnNames: i
+ outputColumnNames: _col0
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(50), avg(UDFToDouble(50)), avg(CAST( 50 AS decimal(10,0)))
- keys: i (type: int)
+ aggregations: avg(50), avg(50.0), avg(50)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out
index 44ecd09..69f4754 100644
--- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out
+++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out
@@ -133,11 +133,11 @@ STAGE PLANS:
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i (type: int)
- outputColumnNames: i
+ outputColumnNames: _col0
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(50), avg(UDFToDouble(50)), avg(CAST( 50 AS decimal(10,0)))
- keys: i (type: int)
+ aggregations: avg(50), avg(50.0), avg(50)
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
index cb2d56b..7e161d1 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
@@ -54,11 +54,11 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
- outputColumnNames: cint, cdecimal1, cdecimal2
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count()
- keys: cint (type: int)
+ aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), count(_col2), max(_col2), min(_col2), sum(_col2), count()
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
@@ -159,11 +159,11 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
- outputColumnNames: cint, cdecimal1, cdecimal2
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
- keys: cint (type: int)
+ aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), avg(_col1), stddev_pop(_col1), stddev_samp(_col1), count(_col2), max(_col2), min(_col2), sum(_col2), avg(_col2), stddev_pop(_col2), stddev_samp(_col2), count()
+ keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
index 23f2b98..322270f 100644
--- a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
@@ -129,10 +129,10 @@ STAGE PLANS:
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), t (type: tinyint)
- outputColumnNames: s, t
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: s (type: string), t (type: tinyint)
+ keys: _col0 (type: string), _col1 (type: tinyint)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vector_elt.q.out b/ql/src/test/results/clientpositive/spark/vector_elt.q.out
index 818e266..49d1458 100644
--- a/ql/src/test/results/clientpositive/spark/vector_elt.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_elt.q.out
@@ -21,7 +21,7 @@ STAGE PLANS:
predicate: (ctinyint > 0) (type: boolean)
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ((ctinyint % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((ctinyint % 2) + 1), cstring1, cint) (type: string)
+ expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Limit
diff --git a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
index 6f62820..3d6a236 100644
--- a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
@@ -129,11 +129,11 @@ STAGE PLANS:
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), t (type: tinyint), b (type: bigint)
- outputColumnNames: s, t, b
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(b)
- keys: s (type: string), t (type: tinyint)
+ aggregations: max(_col2)
+ keys: _col0 (type: string), _col1 (type: tinyint)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
index 02c1fc6..8cf1a81 100644
--- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
@@ -30,23 +30,11 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
+ Edges:
+ Reducer 4 <- Map 3 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: li
- Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean)
- Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 l_partkey (type: int)
- Local Work:
- Map Reduce Local Work
- Map 4
+ Map 2
Map Operator Tree:
TableScan
alias: lineitem
@@ -65,18 +53,11 @@ STAGE PLANS:
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col1 (type: int)
+ 0 _col0 (type: int)
1 _col0 (type: int)
Local Work:
Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
alias: lineitem
@@ -84,17 +65,21 @@ STAGE PLANS:
Filter Operator
predicate: l_partkey is not null (type: boolean)
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: l_partkey (type: int)
- mode: hash
+ Select Operator
+ expressions: l_partkey (type: int)
outputColumnNames: _col0
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ Reducer 4
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
@@ -103,38 +88,60 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
+ Spark HashTable Sink Operator
keys:
- 0 _col0 (type: int)
- 1 l_partkey (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 3
- Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Semi Join 0 to 1
- keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col3
- input vertices:
- 1 Map 4
- Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col3 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
Stage: Stage-0
Fetch Operator
@@ -181,60 +188,80 @@ where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-3
Spark
#### A masked pattern was here ####
Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: li
- Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (((l_partkey is not null and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean)
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 l_partkey (type: int)
- Local Work:
- Map Reduce Local Work
- Map 4
+ Map 2
Map Operator Tree:
TableScan
alias: lineitem
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean)
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
+ predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), 1 (type: int)
+ expressions: l_orderkey (type: int), l_linenumber (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int), _col1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col1 (type: int), _col4 (type: int)
+ 0 _col0 (type: int), _col3 (type: int)
1 _col0 (type: int), _col1 (type: int)
Local Work:
Map Reduce Local Work
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col3 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 14 Data size: 1714 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 4 <- Map 3 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
alias: lineitem
@@ -242,17 +269,21 @@ STAGE PLANS:
Filter Operator
predicate: l_partkey is not null (type: boolean)
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: l_partkey (type: int)
- mode: hash
+ Select Operator
+ expressions: l_partkey (type: int)
outputColumnNames: _col0
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ Reducer 4
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
@@ -265,33 +296,23 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 l_partkey (type: int)
- outputColumnNames: _col0, _col1, _col3, _col4
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
input vertices:
- 1 Map 3
+ 0 Map 1
Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Semi Join 0 to 1
- keys:
- 0 _col1 (type: int), _col4 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
- outputColumnNames: _col0, _col3
- input vertices:
- 1 Map 4
- Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col3 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
Stage: Stage-0
diff --git a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
index 3363c8b..6bbb6b9 100644
--- a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
@@ -126,11 +126,11 @@ STAGE PLANS:
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: bo (type: boolean), b (type: bigint)
- outputColumnNames: bo, b
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(b)
- keys: bo (type: boolean)
+ aggregations: max(_col1)
+ keys: _col0 (type: boolean)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
index 9ec8538..b78aec9 100644
--- a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
@@ -300,11 +300,11 @@ STAGE PLANS:
alias: vectortab2korc
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: dt (type: date)
- outputColumnNames: dt
+ expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: concat(concat(concat('Quarter ', UDFToString(UDFToInteger((((month(dt) - 1) / 3) + 1)))), '-'), UDFToString(year(dt))) (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index f61ef7d..b2dd09d 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -129,10 +129,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
- outputColumnNames: ctinyint
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(ctinyint)
+ aggregations: sum(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -435,10 +435,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint)
- outputColumnNames: cbigint
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(cbigint)
+ aggregations: sum(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -741,10 +741,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cfloat (type: float)
- outputColumnNames: cfloat
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(cfloat)
+ aggregations: sum(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -990,14 +990,14 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((cstring2 like '%b%') or ((79.553 <> cint) or (cbigint < cdouble))) (type: boolean)
+ predicate: ((cstring2 like '%b%') or ((79.553 <> UDFToDouble(cint)) or (UDFToDouble(cbigint) < cdouble))) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint)
- outputColumnNames: cbigint, cfloat, ctinyint
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint)
+ aggregations: avg(_col0), stddev_pop(_col0), var_samp(_col0), count(), sum(_col1), min(_col2)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
@@ -1014,7 +1014,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), (- _col0) (type: double), (-6432 + _col0) (type: double), _col1 (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) + (-6432 + _col0)) (type: double), _col2 (type: double), (- (-6432 + _col0)) (type: double), (-6432 + (- (-6432 + _col0))) (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) / (- (-6432 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint)
+ expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
index 41ee583..be00d16 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
@@ -82,15 +82,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((ctimestamp1 > 11) and ((ctimestamp2 <> 12) and (ctinyint < 9763215.5639)))) (type: boolean)
+ predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((UDFToDouble(ctimestamp1) > 11.0) and ((UDFToDouble(ctimestamp2) <> 12.0) and (UDFToDouble(ctinyint) < 9763215.5639)))) (type: boolean)
Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
- outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
- keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
+ aggregations: max(_col1), sum(_col3), stddev_pop(_col3), stddev_pop(_col1), max(_col3), min(_col1)
+ keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
@@ -110,7 +110,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * _col3) (type: double), _col7 (type: double), _col8 (type: double), (((- _col1) + _col5) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * ((- _col1) + _col5)) / _col1) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * ((- _col1) + _col5)) (type: double)
+ expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * UDFToDouble(_col3)) (type: double), _col7 (type: double), _col8 (type: double), (UDFToDouble(((- _col1) + _col5)) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double)
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -336,15 +336,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((ctimestamp1 > -1.388) and ((ctimestamp2 <> -1.3359999999999999) and (ctinyint < 9763215.5639)))) (type: boolean)
+ predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((UDFToDouble(ctimestamp1) > -1.388) and ((UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (UDFToDouble(ctinyint) < 9763215.5639)))) (type: boolean)
Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
- outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
- keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
+ aggregations: max(_col1), sum(_col3), stddev_pop(_col3), stddev_pop(_col1), max(_col3), min(_col1)
+ keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
@@ -364,7 +364,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * _col3) (type: double), _col7 (type: double), _col8 (type: double), (((- _col1) + _col5) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * ((- _col1) + _col5)) / _col1) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * ((- _col1) + _col5)) (type: double)
+ expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * UDFToDouble(_col3)) (type: double), _col7 (type: double), _col8 (type: double), (UDFToDouble(((- _col1) + _col5)) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double)
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
index 1de639b..cbfa70e 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
@@ -82,15 +82,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((ctinyint <= cbigint) and ((cint <= cdouble) or (ctimestamp2 < ctimestamp1))) and (cdouble < ctinyint)) and ((cbigint > -257) or (cfloat < cint))) (type: boolean)
+ predicate: ((((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) and (cdouble < UDFToDouble(ctinyint))) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean)
Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double)
- outputColumnNames: ctimestamp1, cfloat, cstring1, cboolean1, cdouble
+ expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_samp((- ((- 26.28) + cdouble))), max(cfloat), stddev_pop(cfloat), count(cfloat), var_pop(cfloat), var_samp(cfloat)
- keys: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double)
+ aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1)
+ keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
@@ -110,7 +110,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (_col1 - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (_col1 - _col4)) (type: double)
+ expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
index 960a058..a58d941 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
@@ -78,15 +78,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((cstring2 like '%ss%') or (cstring1 like '10%')) or ((cint >= -75) and ((ctinyint = csmallint) and (cdouble >= -3728)))) (type: boolean)
+ predicate: ((cstring2 like '%ss%') or ((cstring1 like '10%') or ((cint >= -75) and ((UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))))) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp)
- outputColumnNames: cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint)
- keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp)
+ aggregations: stddev_samp(_col0), min(_col2), stddev_samp(_col4), var_pop(_col4), var_samp(_col5), stddev_pop(_col5)
+ keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
@@ -106,7 +106,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - _col5) (type: double), _col8 (type: double), (_col2 * 79.553) (type: double), (33 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (_col5 - _col0) (type: float), (-23 % _col4) (type: int), (- (-26.28 - _col5)) (type: double), _col12 (type: double)
+ expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - UDFToDouble(_col5)) (type: double), _col8 (type: double), (_col2 * 79.553) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - UDFToDouble(_col5))) (type: double), _col12 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
index 473eaf4..a42c30a 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
@@ -63,11 +63,11 @@ STAGE PLANS:
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp)
- outputColumnNames: cstring1, cdouble, ctimestamp1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble)
- keys: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp)
+ aggregations: count(_col1), stddev_samp(_col1), min(_col1)
+ keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
@@ -87,7 +87,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * _col3) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (_col3 / -1.389) (type: double), _col4 (type: double)
+ expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (UDFToDouble(_col3) / -1.389) (type: double), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
index 17b35fe..8bbeb51 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
@@ -59,11 +59,11 @@ STAGE PLANS:
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp)
- outputColumnNames: cstring1, cdouble, ctimestamp1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble)
- keys: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp)
+ aggregations: count(_col1), stddev_samp(_col1), min(_col1)
+ keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
@@ -83,7 +83,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * _col3) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (_col3 / -1.389) (type: double), _col4 (type: double)
+ expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (UDFToDouble(_col3) / -1.389) (type: double), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out
index 30d116f..5887839 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out
@@ -168,7 +168,7 @@ STAGE PLANS:
predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean)
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: (cbigint - 988888) (type: bigint), (cdouble / (cbigint - 988888)) (type: double), (1.2 / (cbigint - 988888)) (type: double)
+ expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / UDFToDouble((cbigint - 988888))) (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -340,10 +340,10 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((cdouble >= -500) and (cdouble < -199)) (type: boolean)
+ predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean)
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: (cdouble + 200.0) (type: double), (cbigint / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double)
+ expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col4, _col5
Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out b/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out
index f92e823..f0852ea 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out
@@ -67,7 +67,7 @@ STAGE PLANS:
alias: alltypesorc_part
Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: (cdouble + 2) (type: double)
+ expressions: (cdouble + 2.0) (type: double)
outputColumnNames: _col0
Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out
index 6e784cd..eb8914b 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out
@@ -18,17 +18,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- filterExpr: (cbigint < cdouble) (type: boolean)
+ filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (cbigint < cdouble) (type: boolean)
+ predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean)
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint)
- outputColumnNames: cbigint
+ outputColumnNames: _col0
Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(cbigint)
+ aggregations: avg(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index c2035f2..657f1b5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -148,14 +148,14 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((762 = cbigint) or ((csmallint < cfloat) and ((ctimestamp2 > -5) and (cdouble <> cint)))) or (cstring1 = 'a')) or ((cbigint <= -1.389) and ((cstring2 <> 'a') and ((79.553 <> cint) and (cboolean2 <> cboolean1))))) (type: boolean)
+ predicate: ((762 = cbigint) or (((UDFToFloat(csmallint) < cfloat) and ((UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint)))) or ((cstring1 = 'a') or ((UDFToDouble(cbigint) <= -1.389) and ((cstring2 <> 'a') and ((79.553 <> UDFToDouble(cint)) and (cboolean2 <> cboolean1))))))) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint)
- outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint)
+ aggregations: avg(_col0), sum(_col1), stddev_pop(_col0), stddev_samp(_col2), var_samp(_col0), avg(_col3), stddev_samp(_col0), min(_col4), count(_col2)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
@@ -172,7 +172,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), (_col0 + -3728) (type: double), (- (_col0 + -3728)) (type: double), (- (- (_col0 + -3728))) (type: double), ((- (- (_col0 + -3728))) * (_col0 + -3728)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728))) * (_col0 + -3728)) * (- (- (_col0 + -3728)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728)))) (type: double), ((_col2 - (- (- (_col0 + -3728)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563) (type: double), _col6 (type: double), (- ((- _col2) / -563)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (_col7 / ((- _col2) / -563)) (type: double), (- (_col0 / _col1)) (type: double)
+ expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -360,14 +360,14 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((cbigint <= 197) and (cint < cbigint)) or ((cdouble >= -26.28) and (csmallint > cdouble))) or ((ctinyint > cfloat) and cstring1 regexp '.*ss.*')) or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean)
+ predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or (((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or (((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))))) (type: boolean)
Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint)
- outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint)
+ aggregations: max(_col0), var_pop(_col1), stddev_pop(_col2), max(_col3), avg(_col4), min(_col0), min(_col3), stddev_samp(_col2), var_samp(_col0)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -384,7 +384,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), (_col0 / -3728) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), ((_col0 * -3728) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (_col0 / -3728)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((_col0 / -3728) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double)
+ expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22
Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -563,14 +563,14 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((ctimestamp1 = ctimestamp2) or (762.0 = cfloat)) or (cstring1 = 'ss')) or ((csmallint <= cbigint) and (1 = cboolean2))) or (cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a')))) (type: boolean)
+ predicate: ((ctimestamp1 = ctimestamp2) or ((762.0 = cfloat) or ((cstring1 = 'ss') or (((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a'))))))) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double)
- outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint)
+ aggregations: var_pop(_col0), count(), max(_col1), stddev_pop(_col2), max(_col3), stddev_samp(_col4), count(_col1), avg(_col1)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
@@ -587,7 +587,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (_col1 % 79.553) (type: double), _col2 (type: tinyint), (_col1 - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (_col2 + (762 * (- _col1))) (type: bigint), ((- _col0) + _col4) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (_col2 + (762 * (- _col1)))) (type: bigint)
+ expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (UDFToDouble(_col1) % 79.553) (type: double), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -745,14 +745,14 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((ctimestamp2 <= ctimestamp1) and ((cbigint <> cdouble) and ('ss' <= cstring1))) or ((csmallint < ctinyint) and (ctimestamp1 >= 0))) or (cfloat = 17.0)) (type: boolean)
+ predicate: (((ctimestamp2 <= ctimestamp1) and ((UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1))) or (((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17.0))) (type: boolean)
Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float)
- outputColumnNames: ctinyint, cbigint, cint, cfloat
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat)
+ aggregations: avg(_col0), max(_col1), stddev_samp(_col2), var_pop(_col2), var_pop(_col1), max(_col3)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
@@ -769,7 +769,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), (_col0 + 6981) (type: double), ((_col0 + 6981) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981) + _col0) / _col0) (type: double), (- (_col0 + 6981)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), ((- _col1) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double)
+ expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -935,10 +935,10 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and ((csmallint < 79.553) and (-257 <> ctinyint)))) or ((cdouble > ctinyint) and (cfloat >= cint))) or ((cint < cbigint) and (ctinyint > cbigint))) (type: boolean)
+ predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or (((1 <> cboolean2) and ((UDFToDouble(csmallint) < 79.553) and (-257 <> UDFToInteger(ctinyint)))) or (((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))))) (type: boolean)
Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - cint) (type: double), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (cint / cint) (type: double), ((-863.257 - cint) - -26.28) (type: double), (- cfloat) (type: float), (cdouble * -89010) (type: double), (ctinyint / 988888) (type: double), (- ctinyint) (type: tinyint), (79.553 / ctinyint) (type: double)
+ expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - UDFToDouble(cint)) (type: double), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - UDFToDouble(cint)) - -26.28) (type: double), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / UDFToDouble(ctinyint)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22
Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -1194,10 +1194,10 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((197.0 > ctinyint) and (cint = cbigint)) or (cbigint = 359)) or (cboolean1 < 0)) or ((cstring1 like '%ss') and (cfloat <= ctinyint))) (type: boolean)
+ predicate: (((197.0 > UDFToDouble(ctinyint)) and (UDFToLong(cint) = cbigint)) or ((cbigint = 359) or ((cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))))) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: cint (type: int), cbigint (type: bigint), (cint / cbigint) (type: double), (cbigint % 79.553) (type: double), (- (cint / cbigint)) (type: double), (10.175 % cfloat) (type: double), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432) (type: float), (cdouble * csmallint) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), cstring1 (type: string), (cfloat - (cint / cbigint)) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359 - cdouble) (type: double), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean)
+ expressions: cint (type: int), cbigint (type: bigint), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (UDFToDouble(cbigint) % 79.553) (type: double), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), cstring1 (type: string), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -1402,10 +1402,10 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((((csmallint > -26.28) and (cstring2 like 'ss')) or ((cdouble <= cbigint) and ((cstring1 >= 'ss') and (cint <> cdouble)))) or (ctinyint = -89010)) or ((cbigint <= cfloat) and (-26.28 <= csmallint))) (type: boolean)
+ predicate: (((UDFToDouble(csmallint) > -26.28) and (cstring2 like 'ss')) or (((cdouble <= UDFToDouble(cbigint)) and ((cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble))) or ((UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= UDFToDouble(csmallint)))))) (type: boolean)
Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + csmallint) (type: int), (cbigint - ctinyint) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - ctinyint) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), ((cint + csmallint) * (- cbigint)) (type: bigint), ((- cdouble) + cbigint) (type: double), (-1.389 / ctinyint) (type: double), (cbigint % cdouble) (type: double), (- csmallint) (type: smallint), (csmallint + (cint + csmallint)) (type: int)
+ expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / UDFToDouble(ctinyint)) (type: double), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -1668,10 +1668,10 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((-1.389 >= cint) and ((csmallint < ctinyint) and (-6432 > csmallint))) or ((cdouble >= cfloat) and (cstring2 <= 'a'))) or ((cstring1 like 'ss%') and (10.175 > cbigint))) (type: boolean)
+ predicate: (((-1.389 >= UDFToDouble(cint)) and ((csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint)))) or (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > UDFToDouble(cbigint))))) (type: boolean)
Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), (cdouble * 10.175) (type: double), ((-6432 * cfloat) / cfloat) (type: double), (- cfloat) (type: float), (cint % csmallint) (type: int), (cdouble * (- cdouble)) (type: double), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (cbigint / 3569) (type: double), (-257 - csmallint) (type: int), (-6432 * cfloat) (type: float), (- cdouble) (type: double)
+ expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (cdouble * (- cdouble)) (type: double), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double)
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col15, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -1876,15 +1876,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((csmallint >= -257) and ((-6432 = csmallint) or ((cint >= cdouble) and (ctinyint <= cint)))) (type: boolean)
+ predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean)
Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint)
- outputColumnNames: csmallint, cbigint, ctinyint
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count()
- keys: csmallint (type: smallint)
+ aggregations: stddev_samp(_col0), sum(_col1), var_pop(_col2), count()
+ keys: _col0 (type: smallint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
@@ -1904,7 +1904,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: smallint), (_col0 % -75) (type: int), _col1 (type: double), (-1.389 / _col0) (type: double), _col2 (type: bigint), ((_col0 % -75) / _col2) (type: double), (- (_col0 % -75)) (type: int), _col3 (type: double), (- (- (_col0 % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint)
+ expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / UDFToDouble(_col0)) (type: double), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -2084,15 +2084,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((cdouble > 2563.58) and (((cbigint >= cint) and ((csmallint < cint) and (cfloat < -5638.15))) or ((cdouble <= cbigint) and (-5638.15 > cbigint)))) (type: boolean)
+ predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and ((UDFToInteger(csmallint) < cint) and (UDFToDouble(cfloat) < -5638.15))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > UDFToDouble(cbigint))))) (type: boolean)
Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdouble (type: double), cfloat (type: float)
- outputColumnNames: cdouble, cfloat
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble)
- keys: cdouble (type: double)
+ aggregations: var_samp(_col0), count(_col1), sum(_col1), var_pop(_col0), stddev_pop(_col0), sum(_col0)
+ keys: _col0 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
@@ -2112,7 +2112,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double)
+ expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double)
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -2339,15 +2339,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((ctimestamp1 <> 0) and (((((((-257 <> ctinyint) and cboolean2 is not null) and (cstring1 regexp '.*ss' and (-3 < ctimestamp1))) or (ctimestamp2 = -5)) or ((ctimestamp1 < 0) and (cstring2 like '%b%'))) or (cdouble = cint)) or (cboolean1 is null and (cfloat < cint)))) (type: boolean)
+ predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and (cboolean2 is not null and (cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))))) or ((UDFToDouble(ctimestamp2) = -5.0) or (((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or ((cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint)))))))) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double)
- outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint)
- keys: ctimestamp1 (type: timestamp), cstring1 (type: string)
+ aggregations: stddev_pop(_col2), avg(_col3), count(), min(_col4), var_samp(_col3), var_pop(_col5), avg(_col2), var_samp(_col5), avg(_col5), min(_col6), var_pop(_col3), stddev_pop(_col4), sum(_col2)
+ keys: _col0 (type: timestamp), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
@@ -2367,7 +2367,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: timestamp), _col1 (type: string), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * (- _col4)) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) (type: double), (- (- _col2)) (type: double), ((- _col4) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col2 (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) - (((-26.28 - _col2) * (- _col2)) * (- _col4))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) - (((-26.28 - _col2) * (- _col2)) * (- _col4))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (_col2 * 10.175) (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / (- _col5)) (type: double), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) / _col7) (type: double), (- (- _col4)) (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) % -26.28) (type: double), (- _col2) (type: double), _col3 (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint)
+ expressions: _col0 (type: timestamp), _col1 (type: string), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col2 (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (_col2 * 10.175) (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / UDFToDouble((- _col5))) (type: double), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double), (- _col2) (type: double), _col3 (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint)
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col38, _col4, _col5, _col7, _col8, _col9
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -2672,15 +2672,15 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (cboolean1 is not null and (((((cdouble < csmallint) and ((cboolean2 = cboolean1) and (cbigint <= -863.257))) or ((cint >= -257) and (cstring1 is not null and (cboolean1 >= 1)))) or cstring2 regexp 'b') or ((csmallint >= ctinyint) and ctimestamp2 is null))) (type: boolean)
+ predicate: (cboolean1 is not null and (((cdouble < UDFToDouble(csmallint)) and ((cboolean2 = cboolean1) and (UDFToDouble(cbigint) <= -863.257))) or (((cint >= -257) and (cstring1 is not null and (cboolean1 >= 1))) or (cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null))))) (type: boolean)
Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint)
- outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint)
- keys: cboolean1 (type: boolean)
+ aggregations: max(_col1), sum(_col2), var_samp(_col3), avg(_col4), min(_col2), var_pop(_col2), sum(_col3), stddev_samp(_col5), stddev_pop(_col6), avg(_col3)
+ keys: _col0 (type: boolean)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
@@ -2700,7 +2700,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 2389 Data size: 73341 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: boolean), _col1 (type: float), ((_col2 - 10.175) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: double), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * _col5) (type: double), (- _col1) (type: float), (_col7 - (-1.389 * _col5)) (type: double), _col9 (type: double), (- (_col7 - (-1.389 * _col5))) (type: double), _col10 (type: double), (- _col10) (type: double), (_col10 * _col7) (type: double), (-26.28 / _col1) (type: double), _col2 (type: bigint), (_col2 - 10.175) (type: double), _col3 (type: double), (_col3 % _col1) (type: double), (10.175 + (- _col1)) (type: double), _col4 (type: double)
+ expressions: _col0 (type: boolean), _col1 (type: float), ((UDFToDouble(_col2) - 10.175) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + UDFToDouble((- _col1)))) (type: double), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * UDFToDouble(_col5)) (type: double), (- _col1) (type: float), (UDFToDouble(_col7) - (-1.389 * UDFToDouble(_col5))) (type: double), _col9 (type: double), (- (UDFToDouble(_col7) - (-1.389 * UDFToDouble(_col5)))) (type: double), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (UDFToDouble(_col2) - 10.175) (type: double), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + UDFToDouble((- _col1))) (type: double), _col4 (type: double)
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 2389 Data size: 73341 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -2915,10 +2915,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: i (type: int)
- outputColumnNames: i
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: count(i)
+ aggregations: count(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -3093,10 +3093,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
- outputColumnNames: ctinyint
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(ctinyint)
+ aggregations: count(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -3160,10 +3160,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int)
- outputColumnNames: cint
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cint)
+ aggregations: count(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -3227,10 +3227,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cfloat (type: float)
- outputColumnNames: cfloat
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cfloat)
+ aggregations: count(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -3294,10 +3294,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cstring1 (type: string)
- outputColumnNames: cstring1
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cstring1)
+ aggregations: count(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -3361,10 +3361,10 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean)
- outputColumnNames: cboolean1
+ outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cboolean1)
+ aggregations: count(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index daf6ad3..0e1e9d3 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -46,7 +46,7 @@ STAGE PLANS:
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((csmallint = 418) or (csmallint = 12205)) or (csmallint = 10583)) (type: boolean)
+ predicate: ((csmallint = 418) or ((csmallint = 12205) or (csmallint = 10583))) (type: boolean)
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out
index 78b0b37..b6c2b35 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out
@@ -23,15 +23,19 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: t2
+ alias: t1
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: cint is not null (type: boolean)
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 cint (type: int)
- 1 cint (type: int)
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
Local Work:
Map Reduce Local Work
@@ -49,25 +53,33 @@ STAGE PLANS:
Filter Operator
predicate: cint is not null (type: boolean)
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 cint (type: int)
- 1 cint (type: int)
- outputColumnNames: _col2, _col17
- input vertices:
- 1 Map 3
- Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17))
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct)
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0), max(_col1), min(_col0), avg(_col2)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct)
Local Work:
Map Reduce Local Work
Execution mode: vectorized
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
index b84a4c3..881641f 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
@@ -31,25 +31,33 @@ STAGE PLANS:
Filter Operator
predicate: cint is not null (type: boolean)
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: cint (type: int)
- sort order: +
- Map-reduce partition columns: cint (type: int)
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
- alias: t2
+ alias: t1
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: cint is not null (type: boolean)
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: cint (type: int)
- sort order: +
- Map-reduce partition columns: cint (type: int)
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
@@ -57,19 +65,23 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 cint (type: int)
- 1 cint (type: int)
- outputColumnNames: _col2, _col17
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17))
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0), max(_col1), min(_col0), avg(_col2)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct)
Reducer 3
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
index ed1ba4b..290db0d 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
@@ -644,10 +644,10 @@ STAGE PLANS:
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctimestamp1 (type: timestamp)
- outputColumnNames: ctimestamp1
+ outputColumnNames: _col0
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count()
+ aggregations: min(_col0), max(_col0), count(_col0), count()
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
@@ -725,10 +725,10 @@ STAGE PLANS:
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctimestamp1 (type: timestamp)
- outputColumnNames: ctimestamp1
+ outputColumnNames: _col0
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(ctimestamp1)
+ aggregations: sum(_col0)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -815,10 +815,10 @@ STAGE PLANS:
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctimestamp1 (type: timestamp)
- outputColumnNames: ctimestamp1
+ outputColumnNames: _col0
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1)
+ aggregations: avg(_col0), variance(_col0), var_pop(_col0), var_samp(_col0), std(_col0), stddev(_col0), stddev_pop(_col0), stddev_samp(_col0)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out b/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out
index c1f3de7..f6bfad2 100644
--- a/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out
+++ b/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out
@@ -69,6 +69,7 @@ select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+ order by p.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@lineitem
#### A masked pattern was here ####
@@ -77,6 +78,7 @@ select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+ order by p.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@lineitem
#### A masked pattern was here ####