diff --git a/data/conf/spark/standalone/hive-site.xml b/data/conf/spark/standalone/hive-site.xml index 016f568..abc73f8 100644 --- a/data/conf/spark/standalone/hive-site.xml +++ b/data/conf/spark/standalone/hive-site.xml @@ -230,4 +230,10 @@ hive_admin_user + + hive.in.test + true + Internal marker for test. Used for masking env-dependent values + + diff --git a/data/conf/spark/yarn-client/hive-site.xml b/data/conf/spark/yarn-client/hive-site.xml index 39ba20e..b7211ee 100644 --- a/data/conf/spark/yarn-client/hive-site.xml +++ b/data/conf/spark/yarn-client/hive-site.xml @@ -250,4 +250,10 @@ hive_admin_user + + hive.in.test + true + Internal marker for test. Used for masking env-dependent values + + diff --git a/ql/src/test/queries/clientpositive/cbo_subq_in.q b/ql/src/test/queries/clientpositive/cbo_subq_in.q index 7a0bfba..0a25b9c 100644 --- a/ql/src/test/queries/clientpositive/cbo_subq_in.q +++ b/ql/src/test/queries/clientpositive/cbo_subq_in.q @@ -29,6 +29,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey ; -- where and having diff --git a/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q b/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q index b7e1bf1..0a1f137 100644 --- a/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q +++ b/ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q @@ -7,12 +7,12 @@ CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE; EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10; +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10; FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10; +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10; SELECT DEST1.* FROM DEST1; SELECT DEST2.* FROM DEST2; diff --git a/ql/src/test/results/clientpositive/cbo_subq_in.q.out b/ql/src/test/results/clientpositive/cbo_subq_in.q.out index c1f3de7..f6bfad2 100644 --- a/ql/src/test/results/clientpositive/cbo_subq_in.q.out +++ b/ql/src/test/results/clientpositive/cbo_subq_in.q.out @@ -69,6 +69,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem #### A masked pattern was here #### @@ -77,6 +78,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@lineitem #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out index 4611f60..0564056 100644 --- a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out @@ -20,13 +20,13 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@DEST2 PREHOOK: query: EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -84,29 +84,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array), _col1 (type: bigint) + key expressions: _col0 (type: array) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: array), VALUE._col1 (type: bigint) + expressions: KEY.reducesinkkey0 (type: array), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -149,29 +147,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: map), _col1 (type: bigint) + key expressions: _col0 (type: map) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: map), VALUE._col1 (type: bigint) + expressions: KEY.reducesinkkey0 (type: map), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -198,15 +194,15 @@ STAGE PLANS: Stats-Aggr Operator PREHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 POSTHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 032926d..8955a61 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -173,12 +173,16 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -187,33 +191,33 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int) - 1 deptid (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -248,12 +252,16 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -262,35 +270,32 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -321,12 +326,16 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -335,32 +344,32 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -395,12 +404,16 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string), lastname (type: string) - sort order: +++ - Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string) + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string), _col0 (type: string) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Map 3 Map Operator Tree: TableScan @@ -409,35 +422,32 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string), deptname (type: string) - sort order: +++ - Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 deptid (type: int), lastname (type: string), lastname (type: string) - 1 deptid (type: int), deptname (type: string), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 + 0 _col1 (type: int), _col0 (type: string), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join1.q.out b/ql/src/test/results/clientpositive/spark/auto_join1.q.out index d9215f8..d26a33e 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -32,15 +32,19 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -56,28 +60,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work @@ -105,7 +113,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join10.q.out b/ql/src/test/results/clientpositive/spark/auto_join10.q.out index cd50576..b43e55c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join10.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join10.q.out @@ -33,8 +33,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -58,8 +58,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -67,19 +67,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash + Select Operator + expressions: hash(_col0,_col1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/auto_join11.q.out b/ql/src/test/results/clientpositive/spark/auto_join11.q.out index ed32dec..f8fc309 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join11.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join11.q.out @@ -30,11 +30,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -55,11 +55,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -67,19 +67,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 input vertices: 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash + Select Operator + expressions: hash(_col2,_col1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/auto_join14.q.out b/ql/src/test/results/clientpositive/spark/auto_join14.q.out index 830314e..30be2e8 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -29,18 +29,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -48,36 +52,40 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and key is not null) (type: boolean) Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col3) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/auto_join16.q.out b/ql/src/test/results/clientpositive/spark/auto_join16.q.out index 8c166b0..5c4bbb3 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join16.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join16.q.out @@ -24,61 +24,69 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > 10) and value is not null) and (key > 20)) and (value < 200)) (type: boolean) - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + predicate: (((((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) > 20.0)) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string), _col1 (type: string) - 1 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: tab + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(value) < 200.0)) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join17.q.out b/ql/src/test/results/clientpositive/spark/auto_join17.q.out index 3144db6..bc492c9 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -28,15 +28,19 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -52,28 +56,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work @@ -101,9 +109,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest1.key1,dest1.value1,dest1.key2,dest1.value2)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join18.q.out b/ql/src/test/results/clientpositive/spark/auto_join18.q.out index f5a2227..eaef06c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -45,11 +45,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -66,11 +66,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -103,15 +103,19 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 07c7aa5..df737c2 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -47,11 +47,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -68,11 +68,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) + aggregations: count(DISTINCT _col1), count(DISTINCT _col0) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -105,15 +105,19 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4)) - mode: hash + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join19.q.out b/ql/src/test/results/clientpositive/spark/auto_join19.q.out index f2b0140..9e4fb8f 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -35,10 +35,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -54,28 +58,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col8 - input vertices: - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + input vertices: + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/auto_join26.q.out b/ql/src/test/results/clientpositive/spark/auto_join26.q.out index 58821e9..3c437a1 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: x @@ -37,20 +37,24 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -58,31 +62,39 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -128,7 +140,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ] POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join27.q.out b/ql/src/test/results/clientpositive/spark/auto_join27.q.out index 1722837..43313e0 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join27.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join27.q.out @@ -41,7 +41,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 200) (type: boolean) + predicate: (UDFToDouble(key) < 200.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -58,25 +58,29 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 200) (type: boolean) + predicate: (UDFToDouble(key) < 200.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 200) (type: boolean) + predicate: (UDFToDouble(key) < 200.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join4.q.out b/ql/src/test/results/clientpositive/spark/auto_join4.q.out index 3366f75..7cf582c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join4.q.out @@ -50,15 +50,15 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -154,8 +154,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join5.q.out b/ql/src/test/results/clientpositive/spark/auto_join5.q.out index b6d8798..08e9ae6 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -47,18 +47,18 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -70,13 +70,13 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -84,16 +84,16 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 1 + 1 Map 2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -154,8 +154,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join6.q.out b/ql/src/test/results/clientpositive/spark/auto_join6.q.out index 2ca2674..5b7e904 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -54,7 +54,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -69,10 +69,10 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -154,8 +154,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join8.q.out b/ql/src/test/results/clientpositive/spark/auto_join8.q.out index e0df9a2..e77817a 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join8.q.out @@ -50,15 +50,15 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -96,7 +96,7 @@ STAGE PLANS: predicate: _col2 is null (type: boolean) Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), null (type: int), _col3 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -157,8 +157,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join9.q.out b/ql/src/test/results/clientpositive/spark/auto_join9.q.out index 6daf348..568891b 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -33,10 +33,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -52,28 +56,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col8 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + input vertices: + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out b/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out index 8934433..b6b89c1 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out @@ -44,7 +44,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 3080335 -Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -330,7 +330,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 3078400 -Warning: Shuffle Join JOIN[10][tables = [a, b]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out index 1f37c75..4b809ce 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -24,7 +24,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 -Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -34,7 +34,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 -Warning: Map Join MAPJOIN[11][bigTable=b] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index c3d5225..72f60d0 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -24,33 +24,41 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out b/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out index c1f3de7..f6bfad2 100644 --- a/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out +++ b/ql/src/test/results/clientpositive/spark/cbo_subq_in.q.out @@ -69,6 +69,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem #### A masked pattern was here #### @@ -77,6 +78,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@lineitem #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out index 602de6e..665bfce 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out @@ -23,39 +23,45 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 100) (type: boolean) + predicate: (UDFToDouble(key) = 100.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '100' (type: string) - sort order: + - Map-reduce partition columns: '100' (type: string) + Select Operator + expressions: value (type: string) + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '100' (type: string) + sort order: + + Map-reduce partition columns: '100' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 100) (type: boolean) + predicate: (UDFToDouble(key) = 100.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '100' (type: string) - sort order: + - Map-reduce partition columns: '100' (type: string) + Select Operator Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: '100' (type: string) + sort order: + + Map-reduce partition columns: '100' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '100' (type: string), 101.0 (type: double), _col6 (type: string) + expressions: '100' (type: string), 101.0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -115,53 +121,57 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_orderkey is not null and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: l_orderkey (type: int), l_linenumber (type: int) - sort order: ++ - Map-reduce partition columns: l_orderkey (type: int), l_linenumber (type: int) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - value expressions: l_partkey (type: int), l_suppkey (type: int) + predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Map 3 Map Operator Tree: TableScan - alias: lineitem + alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) + expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 l_orderkey (type: int), l_linenumber (type: int) + 0 _col0 (type: int), _col3 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 13 Data size: 1582 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1582 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 1582 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out index cb9eda5..b2e9ffb 100644 --- a/ql/src/test/results/clientpositive/spark/count.q.out +++ b/ql/src/test/results/clientpositive/spark/count.q.out @@ -53,11 +53,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) - keys: a (type: int), b (type: int), c (type: int) + aggregations: count(DISTINCT _col1), count(DISTINCT _col2), sum(_col3) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE @@ -188,14 +188,14 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: a (type: int), b (type: int), c (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: a (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE - value expressions: d (type: int) + value expressions: _col3 (type: int) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/cross_join.q.out b/ql/src/test/results/clientpositive/spark/cross_join.q.out index bb25e21..2fb6b21 100644 --- a/ql/src/test/results/clientpositive/spark/cross_join.q.out +++ b/ql/src/test/results/clientpositive/spark/cross_join.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- current explain select src.key from src join src src2 PREHOOK: type: QUERY @@ -21,18 +21,24 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator @@ -57,7 +63,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- ansi cross join explain select src.key from src cross join src src2 PREHOOK: type: QUERY @@ -80,18 +86,24 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Join Operator @@ -141,32 +153,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out b/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out index ff58910..c228f0b 100644 --- a/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out +++ b/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out @@ -28,7 +28,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@B -Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from A join B PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join B @@ -49,19 +49,27 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -70,19 +78,15 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out index 6c4e659..ab062ad 100644 --- a/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out +++ b/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out @@ -28,7 +28,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@B -Warning: Map Join MAPJOIN[7][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from A join B PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join B @@ -48,10 +48,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Local Work: Map Reduce Local Work @@ -64,19 +68,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/spark/groupby1.q.out b/ql/src/test/results/clientpositive/spark/groupby1.q.out index f661a62..8f60691 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1.q.out @@ -35,15 +35,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: substr(value, 5) (type: string) + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby4.q.out b/ql/src/test/results/clientpositive/spark/groupby4.q.out index 87d2968..b764466 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4.q.out @@ -37,11 +37,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: substr(key, 1, 1) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string) + key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out index 9fe3b72..65bdf2c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out @@ -20,13 +20,13 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@DEST2 PREHOOK: query: EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -41,8 +41,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 6 (GROUP, 2) Reducer 4 <- Map 7 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 6 @@ -95,19 +95,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array), _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: array), VALUE._col1 (type: bigint) + expressions: KEY.reducesinkkey0 (type: array), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -127,19 +125,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: map), _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: map) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 5 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: map), VALUE._col1 (type: bigint) + expressions: KEY.reducesinkkey0 (type: map), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -179,15 +175,15 @@ STAGE PLANS: Stats-Aggr Operator PREHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 POSTHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 -INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value) as kvmap, COUNT(1) GROUP BY MAP(SRC.key, SRC.value) ORDER BY kvmap limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 @@ -204,16 +200,16 @@ POSTHOOK: query: SELECT DEST1.* FROM DEST1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -["118"] 2 -["180"] 1 -["201"] 1 -["202"] 1 -["238"] 2 -["273"] 3 -["282"] 2 -["419"] 1 -["432"] 1 -["467"] 1 +["0"] 3 +["10"] 1 +["100"] 2 +["103"] 2 +["104"] 2 +["105"] 1 +["11"] 1 +["111"] 1 +["113"] 2 +["114"] 1 PREHOOK: query: SELECT DEST2.* FROM DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -223,12 +219,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### {"0":"val_0"} 3 -{"138":"val_138"} 4 -{"170":"val_170"} 1 -{"19":"val_19"} 1 -{"222":"val_222"} 1 -{"223":"val_223"} 2 -{"226":"val_226"} 1 -{"489":"val_489"} 4 -{"8":"val_8"} 1 -{"80":"val_80"} 1 +{"10":"val_10"} 1 +{"100":"val_100"} 2 +{"103":"val_103"} 2 +{"104":"val_104"} 2 +{"105":"val_105"} 1 +{"11":"val_11"} 1 +{"111":"val_111"} 1 +{"113":"val_113"} 2 +{"114":"val_114"} 1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 42513a4..21c88b1 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -42,11 +42,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -133,11 +133,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -223,11 +223,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), '0' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -302,11 +302,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -407,11 +407,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), '0' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index 16437d5..50d707d 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -436,20 +436,24 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <= 20) (type: boolean) + predicate: (UDFToDouble(key) <= 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -548,7 +552,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP, 2) Reducer 6 <- Map 5 (GROUP, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (SORT, 1) @@ -560,55 +564,57 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + predicate: (((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -617,25 +623,21 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: --++ - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: --++ + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -646,12 +648,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 6f4854c..92b6d19 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -42,11 +42,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -127,11 +127,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), '0' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE @@ -206,11 +206,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string), '0' (type: string) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE @@ -305,11 +305,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT val) - keys: key (type: string), '0' (type: string), val (type: string) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), '0' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 65a8374..6c0f4a5 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -97,11 +97,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -308,11 +308,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -980,11 +980,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: 1 (type: int), key (type: string) + keys: 1 (type: int), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1195,11 +1195,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -1421,12 +1421,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), (key + 1) (type: double) + keys: _col0 (type: string), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -1685,28 +1685,32 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash + Select Operator + expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1958,11 +1962,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2062,11 +2066,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2330,11 +2334,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2437,12 +2441,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: (key + key) (type: double) + keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -3386,12 +3390,12 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) bucketGroup: true - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -3620,11 +3624,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -3850,11 +3854,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int) mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 9f30e15..8248a70 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -97,11 +97,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -309,11 +309,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -998,11 +998,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: 1 (type: int), key (type: string) + keys: 1 (type: int), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1214,11 +1214,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -1458,12 +1458,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), (key + 1) (type: double) + keys: _col0 (type: string), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -1740,28 +1740,32 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash + Select Operator + expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: rand() (type: double) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2030,11 +2034,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2134,11 +2138,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2403,11 +2407,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2510,12 +2514,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: (key + key) (type: double) + keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -3495,12 +3499,12 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) bucketGroup: true - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -3746,11 +3750,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -3976,11 +3980,11 @@ STAGE PLANS: GatherStats: false Select Operator expressions: key (type: string), val (type: string) - outputColumnNames: key, val + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int) mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/having.q.out b/ql/src/test/results/clientpositive/spark/having.q.out index da6186e..245b27a 100644 --- a/ql/src/test/results/clientpositive/spark/having.q.out +++ b/ql/src/test/results/clientpositive/spark/having.q.out @@ -104,20 +104,24 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <> 302) (type: boolean) + predicate: (UDFToDouble(key) <> 302.0) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: max(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -478,11 +482,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(value) - keys: key (type: string) + aggregations: max(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -749,20 +753,24 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 300) (type: boolean) + predicate: (UDFToDouble(key) > 300.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: max(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -947,11 +955,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(value) - keys: key (type: string) + aggregations: max(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1215,11 +1223,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out b/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out index 57942af..c965455 100644 --- a/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out +++ b/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out @@ -25,45 +25,53 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + predicate: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and value is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + predicate: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and value is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col5 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -133,49 +141,57 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + filterExpr: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + predicate: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and value is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan - alias: b - filterExpr: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + alias: a + filterExpr: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + predicate: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and value is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col5 (type: string) + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out index 4da17fb..94adb3d 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out @@ -79,11 +79,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: key (type: string) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -164,6 +164,7 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out index 0f352d7..d947eb5 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out @@ -27,7 +27,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table @@ -82,7 +82,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table diff --git a/ql/src/test/results/clientpositive/spark/innerjoin.q.out b/ql/src/test/results/clientpositive/spark/innerjoin.q.out index 3f6f9ab..762ddad 100644 --- a/ql/src/test/results/clientpositive/spark/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/innerjoin.q.out @@ -38,37 +38,45 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -104,7 +112,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest_j1.* FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -1214,12 +1222,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -1227,7 +1239,7 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col0 (type: string) - 1 key (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join1.q.out b/ql/src/test/results/clientpositive/spark/join1.q.out index 145bbe4..4714f11 100644 --- a/ql/src/test/results/clientpositive/spark/join1.q.out +++ b/ql/src/test/results/clientpositive/spark/join1.q.out @@ -38,37 +38,45 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -104,7 +112,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest_j1.* FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/spark/join10.q.out b/ql/src/test/results/clientpositive/spark/join10.q.out index e7dd570..8d18683 100644 --- a/ql/src/test/results/clientpositive/spark/join10.q.out +++ b/ql/src/test/results/clientpositive/spark/join10.q.out @@ -36,14 +36,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan @@ -53,15 +54,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -70,19 +70,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join11.q.out b/ql/src/test/results/clientpositive/spark/join11.q.out index 2b5eb3a..7d59d0c 100644 --- a/ql/src/test/results/clientpositive/spark/join11.q.out +++ b/ql/src/test/results/clientpositive/spark/join11.q.out @@ -35,35 +35,35 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -72,10 +72,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join14.q.out b/ql/src/test/results/clientpositive/spark/join14.q.out index 336787b..e942087 100644 --- a/ql/src/test/results/clientpositive/spark/join14.q.out +++ b/ql/src/test/results/clientpositive/spark/join14.q.out @@ -35,42 +35,50 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and key is not null) (type: boolean) Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 100.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + expressions: UDFToInteger(_col3) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join16.q.out b/ql/src/test/results/clientpositive/spark/join16.q.out index 000ea08..aecd953 100644 --- a/ql/src/test/results/clientpositive/spark/join16.q.out +++ b/ql/src/test/results/clientpositive/spark/join16.q.out @@ -19,30 +19,34 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > 10) and value is not null) and (key > 20)) and (value < 200)) (type: boolean) - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(value) < 200.0)) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: tab + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + predicate: (((((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) > 20.0)) and key is not null) and value is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -50,16 +54,16 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) - 1 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 58 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/join17.q.out b/ql/src/test/results/clientpositive/spark/join17.q.out index 4aa2a05..4aefeb3 100644 --- a/ql/src/test/results/clientpositive/spark/join17.q.out +++ b/ql/src/test/results/clientpositive/spark/join17.q.out @@ -78,14 +78,18 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -138,21 +142,25 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -201,7 +209,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src2] + /src [src1] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -209,12 +217,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -281,9 +289,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/join18.q.out b/ql/src/test/results/clientpositive/spark/join18.q.out index 3d266ad..ed17bab 100644 --- a/ql/src/test/results/clientpositive/spark/join18.q.out +++ b/ql/src/test/results/clientpositive/spark/join18.q.out @@ -48,11 +48,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -69,11 +69,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index 5560226..18d79d3 100644 --- a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -50,11 +50,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -71,11 +71,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) + aggregations: count(DISTINCT _col1), count(DISTINCT _col0) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/join29.q.out b/ql/src/test/results/clientpositive/spark/join29.q.out index 0b4284c..9479dd1 100644 --- a/ql/src/test/results/clientpositive/spark/join29.q.out +++ b/ql/src/test/results/clientpositive/spark/join29.q.out @@ -36,10 +36,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: x @@ -47,19 +47,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -77,10 +81,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: y @@ -88,19 +92,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -116,12 +124,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 input vertices: - 0 Reducer 2 + 1 Reducer 4 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) + expressions: _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join31.q.out b/ql/src/test/results/clientpositive/spark/join31.q.out index a52a8b6..108a1ea 100644 --- a/ql/src/test/results/clientpositive/spark/join31.q.out +++ b/ql/src/test/results/clientpositive/spark/join31.q.out @@ -38,10 +38,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: x @@ -49,45 +49,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: y @@ -95,41 +92,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 - input vertices: - 0 Reducer 2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) @@ -143,7 +141,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -195,7 +193,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ] POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join34.q.out b/ql/src/test/results/clientpositive/spark/join34.q.out index 8a1ba8d..09b8b6b 100644 --- a/ql/src/test/results/clientpositive/spark/join34.q.out +++ b/ql/src/test/results/clientpositive/spark/join34.q.out @@ -158,7 +158,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 20) (type: boolean) + predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -224,12 +224,12 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: x1 + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key > 100) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -291,7 +291,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [x1] + /src [x] Map 4 Map Operator Tree: TableScan @@ -300,16 +300,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -367,7 +371,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 key (type: string) + 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -452,7 +456,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out b/ql/src/test/results/clientpositive/spark/join35.q.out index 1ed9c90..e84c860 100644 --- a/ql/src/test/results/clientpositive/spark/join35.q.out +++ b/ql/src/test/results/clientpositive/spark/join35.q.out @@ -168,22 +168,26 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 20) (type: boolean) + predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -236,27 +240,31 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: x1 + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key > 100) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -305,7 +313,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [x1] + /src [x] Map 6 Map Operator Tree: TableScan @@ -314,16 +322,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -398,7 +410,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 key (type: string) + 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -500,7 +512,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x1.null, ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x.null, ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join4.q.out b/ql/src/test/results/clientpositive/spark/join4.q.out index eddcf07..55b3a18 100644 --- a/ql/src/test/results/clientpositive/spark/join4.q.out +++ b/ql/src/test/results/clientpositive/spark/join4.q.out @@ -58,7 +58,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -73,20 +73,20 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -158,8 +158,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/join40.q.out b/ql/src/test/results/clientpositive/spark/join40.q.out index 08695ff..02c11cd 100644 --- a/ql/src/test/results/clientpositive/spark/join40.q.out +++ b/ql/src/test/results/clientpositive/spark/join40.q.out @@ -22,10 +22,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <= 100) (type: boolean) + predicate: (UDFToDouble(key) <= 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -37,38 +52,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -677,37 +677,45 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col6 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3762,32 +3770,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) diff --git a/ql/src/test/results/clientpositive/spark/join5.q.out b/ql/src/test/results/clientpositive/spark/join5.q.out index d645e89..1914ad0 100644 --- a/ql/src/test/results/clientpositive/spark/join5.q.out +++ b/ql/src/test/results/clientpositive/spark/join5.q.out @@ -58,7 +58,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -73,33 +73,33 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -158,8 +158,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out b/ql/src/test/results/clientpositive/spark/join6.q.out index 4770770..5f2a85a 100644 --- a/ql/src/test/results/clientpositive/spark/join6.q.out +++ b/ql/src/test/results/clientpositive/spark/join6.q.out @@ -58,7 +58,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -73,10 +73,10 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -158,8 +158,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/join8.q.out b/ql/src/test/results/clientpositive/spark/join8.q.out index d242c10..e3fac82 100644 --- a/ql/src/test/results/clientpositive/spark/join8.q.out +++ b/ql/src/test/results/clientpositive/spark/join8.q.out @@ -58,7 +58,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -73,20 +73,20 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -102,7 +102,7 @@ STAGE PLANS: predicate: _col2 is null (type: boolean) Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), null (type: int), _col3 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -161,8 +161,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git a/ql/src/test/results/clientpositive/spark/join9.q.out b/ql/src/test/results/clientpositive/spark/join9.q.out index 92f0237..c7440da 100644 --- a/ql/src/test/results/clientpositive/spark/join9.q.out +++ b/ql/src/test/results/clientpositive/spark/join9.q.out @@ -94,13 +94,17 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -162,14 +166,18 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -226,12 +234,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col8 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index dabdcb8..f34153d 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[4][tables = [p1, p2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select p1.p_name, p2.p_name from part p1 , part p2 PREHOOK: type: QUERY @@ -21,19 +21,27 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -42,19 +50,15 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col13 + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out index a18d82e..3b3358f 100644 --- a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -26,49 +26,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -76,9 +64,9 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 0 to 2 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - 2 _col0 (type: string), _col1 (type: string) + 0 key (type: string), hr (type: string) + 1 key (type: string), hr (type: string) + 2 key (type: string), hr (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/spark/join_vc.q.out b/ql/src/test/results/clientpositive/spark/join_vc.q.out index 8b25d4f..27265a6 100644 --- a/ql/src/test/results/clientpositive/spark/join_vc.q.out +++ b/ql/src/test/results/clientpositive/spark/join_vc.q.out @@ -157,39 +157,47 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: BLOCK__OFFSET__INSIDE__FILE (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col7 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: bigint) + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out b/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out index e95d2ab..c6f9039 100644 --- a/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out @@ -555,10 +555,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) - outputColumnNames: hr + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: hr (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 1efa9e7..7101beb 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -195,12 +195,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum((key + 1)) - keys: value (type: string) + aggregations: sum(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -289,12 +289,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key + expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg((key + 1)) - keys: value (type: string) + aggregations: avg(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -384,10 +384,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double) - outputColumnNames: cdouble + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: cdouble (type: double) + keys: _col0 (type: double) mode: hash outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE @@ -564,10 +564,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) - outputColumnNames: ctinyint, cdouble + outputColumnNames: _col0, _col1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ctinyint (type: tinyint), cdouble (type: double) + keys: _col0 (type: tinyint), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE @@ -780,11 +780,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), key (type: string) - outputColumnNames: value, key + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(key) - keys: value (type: string) + aggregations: sum(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -883,10 +883,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -896,11 +896,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -911,18 +911,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -942,18 +942,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -961,19 +975,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 4 + Select Operator + expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Limit + Number of rows: 4 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -982,30 +1000,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string), _col1 (type: bigint) - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -1039,16 +1043,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: value (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: key (type: string) + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1293,15 +1297,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: concat(key, value, value, value, value, value, value, value, value, value) (type: string), key (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: concat(key, value, value, value, value, value, value, value, value, value) (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: concat(key, value, value, value, value, value, value, value, value, value) (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out index 3285ad9..4480310 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out @@ -72,7 +72,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 20) (type: boolean) + predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '22' (type: string) @@ -92,7 +92,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 20) and (key < 40)) (type: boolean) + predicate: ((UDFToDouble(key) > 20.0) and (UDFToDouble(key) < 40.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '33' (type: string) diff --git a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out index 78cd4bc..84b0be1 100644 --- a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -504,16 +504,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -714,16 +718,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -785,16 +793,16 @@ STAGE PLANS: 0 {(VALUE._col1 = '2008-04-08')} 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1467,16 +1475,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1583,16 +1595,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1649,16 +1665,16 @@ STAGE PLANS: condition map: Left Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + predicate: ((UDFToDouble(_col3) > 15.0) and (UDFToDouble(_col3) < 25.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/merge1.q.out b/ql/src/test/results/clientpositive/spark/merge1.q.out index 41acbdb..e594d47 100644 --- a/ql/src/test/results/clientpositive/spark/merge1.q.out +++ b/ql/src/test/results/clientpositive/spark/merge1.q.out @@ -42,11 +42,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/merge2.q.out b/ql/src/test/results/clientpositive/spark/merge2.q.out index edeebb7..63274eb 100644 --- a/ql/src/test/results/clientpositive/spark/merge2.q.out +++ b/ql/src/test/results/clientpositive/spark/merge2.q.out @@ -42,11 +42,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 0ccae50..18e1d4e 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -200,10 +200,10 @@ STAGE PLANS: Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: s, bo, bin, si, i, b + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE @@ -256,10 +256,10 @@ STAGE PLANS: Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: s, bo, bin, si, i, b + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE @@ -448,10 +448,10 @@ STAGE PLANS: Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) - outputColumnNames: ts + outputColumnNames: _col0 Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(ts) + aggregations: count(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/order2.q.out b/ql/src/test/results/clientpositive/spark/order2.q.out index 22b1ddc..29eb835 100644 --- a/ql/src/test/results/clientpositive/spark/order2.q.out +++ b/ql/src/test/results/clientpositive/spark/order2.q.out @@ -43,7 +43,7 @@ STAGE PLANS: Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) + predicate: (UDFToDouble(_col0) < 10.0) (type: boolean) Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out index 651c4b4..c3454ee 100644 --- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out +++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out @@ -120,25 +120,32 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -146,11 +153,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -160,51 +169,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -245,12 +232,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 - hr 12 + ds 2008-04-09 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -291,12 +278,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -335,14 +322,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -350,13 +360,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -366,26 +374,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] + /src [a] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -393,21 +401,21 @@ STAGE PLANS: condition map: Outer Join 0 to 1 filter mappings: - 1 [0, 1] + 0 [1, 1] filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + 0 {(VALUE._col1 = '2008-04-08')} + 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) + predicate: ((((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) and (UDFToDouble(_col0) > 15.0)) and (UDFToDouble(_col0) < 25.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -596,80 +604,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -765,60 +718,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -826,13 +758,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -842,43 +772,43 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] + /src [a] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Left Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col7 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean) + predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -921,8 +851,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -936,8 +864,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 diff --git a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out index 15171d9..47555b2 100644 --- a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out @@ -38,37 +38,45 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -104,7 +112,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT dest_j1.* FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index 4c22f0b..efadb1d 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -4818,7 +4818,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 11) (type: boolean) + predicate: (UDFToDouble(key) = 11.0) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), hr (type: string) @@ -5047,7 +5047,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 11) (type: boolean) + predicate: (UDFToDouble(key) = 11.0) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), ds (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index 78db996..31b25b3 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -40,41 +40,56 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 > '20') and (((_col1 < 'val_50') or (_col0 > '2')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -83,27 +98,31 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -315,35 +334,34 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -352,13 +370,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + predicate: ((_col0 > '50') or (_col1 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) + expressions: _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_join.q.out index 441fee1..b97431c 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -37,42 +37,57 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + predicate: (_col0 <> '4') (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '1') and ((_col0 > '20') and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + predicate: ((_col0 > '20') and (((_col1 < 'val_50') or (_col0 > '2')) and (_col0 < '400'))) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '2') and (_col0 <> '4')) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -81,22 +96,26 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -577,17 +596,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key > '2') and (key <> '4')) and (key > '1')) and (key > '20')) and (key < '400')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Map 3 Map Operator Tree: @@ -595,18 +614,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((((key > '1') and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key < '400')) and (key > '2')) and (key <> '4')) and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -615,13 +633,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + predicate: ((_col0 > '50') or (_col2 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index 0cbd718..ed536c2 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -127,34 +127,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,8 +204,8 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Map 4 + /src [a] + Map 3 Map Operator Tree: TableScan alias: a @@ -214,13 +215,24 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Group By Operator + aggregations: min(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -271,51 +283,26 @@ STAGE PLANS: Truncated Path -> Alias: /src [a] Reducer 2 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false - Reducer 3 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -331,6 +318,35 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 21 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 21 Data size: 222 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: double), _col2 (type: double) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -501,33 +517,30 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) + tag: 0 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -577,8 +590,8 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Map 4 + /src [a] + Map 3 Map Operator Tree: TableScan alias: a @@ -588,13 +601,24 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Group By Operator + aggregations: min(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -645,43 +669,18 @@ STAGE PLANS: Truncated Path -> Alias: /src [a] Reducer 2 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false - Reducer 3 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -705,6 +704,31 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: double), _col2 (type: double) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -875,33 +899,30 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -951,24 +972,31 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Map 4 + /src [a] + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1019,43 +1047,18 @@ STAGE PLANS: Truncated Path -> Alias: /src [a] Reducer 2 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false - Reducer 3 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1079,6 +1082,35 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 42 Data size: 445 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 42 Data size: 445 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: double), _col2 (type: double) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -1249,33 +1281,30 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) + tag: 0 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1325,8 +1354,8 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Map 4 + /src [a] + Map 3 Map Operator Tree: TableScan alias: a @@ -1336,13 +1365,24 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Group By Operator + aggregations: min(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1393,43 +1433,18 @@ STAGE PLANS: Truncated Path -> Alias: /src [a] Reducer 2 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false - Reducer 3 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1453,6 +1468,31 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col1) + 1.0) < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (UDFToDouble(_col1) + 2.0) (type: double), (UDFToDouble(_col1) + 3.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: double), _col2 (type: double) + auto parallelism: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out index 67827ed..987c653 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out @@ -37,52 +37,56 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -151,52 +155,56 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) + predicate: ((UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out index 5d6a469..eaafd7e 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out @@ -37,52 +37,65 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '15') and (key < '25')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '15') and (_col0 < '25')) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '15') and (key < '25')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -271,52 +284,53 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '15') and (key < '25')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '15') and (key < '25')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out index 6a0654a..2cb969f 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out @@ -36,47 +36,66 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Filter Operator + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '15') and (_col0 < '25')) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Filter Operator + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -264,47 +283,54 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Filter Operator + predicate: (((((key > '10') and (key < '20')) and (key > '15')) and (key < '25')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Filter Operator + predicate: (((((key > '15') and (key < '25')) and (key > '10')) and (key < '20')) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out b/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out index 2fb148a..02e24c3 100644 --- a/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out +++ b/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out @@ -97,11 +97,11 @@ STAGE PLANS: Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string) - outputColumnNames: l_shipdate + outputColumnNames: _col0 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(l_shipdate) - keys: l_shipdate (type: string) + aggregations: count(_col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE @@ -264,15 +264,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) - outputColumnNames: l_shipdate, _count_of_l_shipdate + outputColumnNames: _col0, _count_of_l_shipdate Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) - keys: l_shipdate (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE @@ -447,12 +447,12 @@ STAGE PLANS: alias: lineitem_ix Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_shipdate (type: string) - outputColumnNames: l_shipdate + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(l_shipdate) - keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE @@ -585,15 +585,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) - outputColumnNames: l_shipdate, _count_of_l_shipdate + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _col1, _col2, _count_of_l_shipdate Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) - keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE @@ -912,15 +912,15 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: null-subquery1:dummy-subquery1:default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) - outputColumnNames: l_shipdate, _count_of_l_shipdate + outputColumnNames: _col0, _count_of_l_shipdate Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) - keys: l_shipdate (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE @@ -1004,11 +1004,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 1 (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(key) - keys: key (type: int) + aggregations: count(_col0) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1058,15 +1058,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: sum(_count_of_key) - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1168,11 +1168,11 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: sum(_count_of_key) @@ -1226,10 +1226,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1281,10 +1281,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: int), key (type: int) - outputColumnNames: value, key + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: value (type: int), key (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1343,10 +1343,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 3 (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1401,10 +1401,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1455,11 +1455,11 @@ STAGE PLANS: alias: tbl Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: key + expressions: key (type: int), substr(key, 2, 3) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), substr(key, 2, 3) (type: string) + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1515,10 +1515,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: int), key (type: int) - outputColumnNames: value, key + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: value (type: int), key (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1577,10 +1577,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 1 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1632,10 +1632,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1687,10 +1687,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1742,10 +1742,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1797,10 +1797,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1855,10 +1855,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 2 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1913,10 +1913,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 3 (type: int), 2 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1969,16 +1969,20 @@ STAGE PLANS: Filter Operator predicate: (value = key) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: key (type: int), value (type: int) - mode: hash + Select Operator + expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -2023,16 +2027,20 @@ STAGE PLANS: Filter Operator predicate: (value = key) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: key (type: int), substr(value, 2, 3) (type: string) - mode: hash + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -2075,11 +2083,11 @@ STAGE PLANS: alias: tbl Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: int), value (type: int) - outputColumnNames: key, value + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), substr(value, 2, 3) (type: string) + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -2134,10 +2142,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 2 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -2153,17 +2161,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), 2 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2279,18 +2283,22 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2373,18 +2381,18 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tblpart_tbl_part_index__ + alias: $hdt$_0:default__tblpart_tbl_part_index__ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_key) - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -2488,11 +2496,11 @@ STAGE PLANS: Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(key) - keys: key (type: int) + aggregations: count(_col0) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -2568,15 +2576,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_key) - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index 36af99a..12f1abb 100644 --- a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -118,29 +118,36 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -148,11 +155,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -162,55 +171,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -251,12 +234,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 - hr 12 + ds 2008-04-09 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -297,12 +280,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -341,14 +324,41 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -356,13 +366,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -372,48 +380,48 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] + /src [a] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 filter mappings: - 1 [0, 1] + 0 [1, 1] filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + 0 {(VALUE._col1 = '2008-04-08')} + 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -607,16 +615,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -723,16 +735,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -789,16 +805,16 @@ STAGE PLANS: condition map: Right Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -983,29 +999,36 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1013,11 +1036,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1027,55 +1052,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1114,14 +1113,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1129,13 +1153,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1145,41 +1167,43 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] + /src [a] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1365,20 +1389,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1474,103 +1502,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] - /srcpart/ds=2008-04-09/hr=11 [a] - /srcpart/ds=2008-04-09/hr=12 [a] Map 3 Map Operator Tree: TableScan @@ -1579,16 +1513,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: (((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1643,41 +1581,37 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col7, _col8 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1697,8 +1631,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM srcpart a @@ -1712,8 +1644,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out index c9c09e8..1f6aac2 100644 --- a/ql/src/test/results/clientpositive/spark/semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -2516,14 +2516,18 @@ STAGE PLANS: alias: a Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key > 100)) (type: boolean) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + predicate: ((key > 100) and value is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan @@ -2534,10 +2538,10 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: value + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: value (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE @@ -2552,7 +2556,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 value (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/skewjoin.q.out b/ql/src/test/results/clientpositive/spark/skewjoin.q.out index 5fb2ecb..ec74786 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -105,25 +105,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -131,12 +139,12 @@ STAGE PLANS: Inner Join 0 to 1 handleSkewJoin: true keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -178,9 +186,9 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col0, _col6 + outputColumnNames: _col1, _col2 Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -218,7 +226,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT sum(hash(key)), sum(hash(value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -625,14 +633,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan @@ -642,15 +651,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -660,19 +668,23 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Conditional Operator @@ -704,18 +716,22 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash + outputColumnNames: _col0, _col1 + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -824,16 +840,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean) + predicate: (key is not null and (UDFToDouble(substring(value, 5)) + 1.0) is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + key expressions: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) sort order: ++ - Map-reduce partition columns: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + Map-reduce partition columns: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 @@ -844,20 +860,24 @@ STAGE PLANS: handleSkewJoin: true keys: 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) - 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + 1 _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) outputColumnNames: _col2, _col3 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash + Select Operator + expressions: hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Conditional Operator @@ -890,17 +910,21 @@ STAGE PLANS: 0 reducesinkkey0 (type: string), reducesinkkey1 (type: double) 1 reducesinkkey0 (type: string), reducesinkkey1 (type: double) outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash + Select Operator + expressions: hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out b/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out index 8afc656..4025885 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out @@ -29,25 +29,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -55,8 +63,8 @@ STAGE PLANS: Inner Join 0 to 1 handleSkewJoin: true keys: - 0 key (type: string) - 1 key (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/stats1.q.out b/ql/src/test/results/clientpositive/spark/stats1.q.out index f00db10..ec2edc4 100644 --- a/ql/src/test/results/clientpositive/spark/stats1.q.out +++ b/ql/src/test/results/clientpositive/spark/stats1.q.out @@ -121,7 +121,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [(src1)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key, x.value PREHOOK: type: QUERY PREHOOK: Input: default@tmptable diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out index 2840729..cceceef 100644 --- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out @@ -90,10 +90,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) + aggregations: count(), count(_col0), count(_col1), count(_col2), count(_col3) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE @@ -146,10 +146,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) + aggregations: count(), count(_col0), count(_col1), count(_col2), count(_col3) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index 854ca14..28eda26 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -43,25 +43,29 @@ STAGE PLANS: Filter Operator predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string), key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((value > 'val_9') and key is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), key (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col1 (type: string), _col2 (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE @@ -76,7 +80,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 value (type: string), key (type: string) + 0 _col1 (type: string), _col0 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 6cf1ae4..00b3399 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -33,16 +33,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan - alias: s1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > '9') (type: boolean) @@ -67,7 +71,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE @@ -147,15 +151,19 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) @@ -180,7 +188,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 key (type: string), value (type: string) + 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE @@ -267,15 +275,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(p_size) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(p_size) (type: double) + Select Operator + expressions: p_name (type: string), p_size (type: int), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_size (type: int) + Reduce Output Operator + key expressions: _col2 (type: double) + sort order: + + Map-reduce partition columns: _col2 (type: double) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Map 3 Map Operator Tree: TableScan @@ -292,21 +304,17 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 UDFToDouble(p_size) (type: double) + 0 _col2 (type: double) 1 _col0 (type: double) - outputColumnNames: _col1, _col5 + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Select Operator @@ -438,16 +446,20 @@ STAGE PLANS: Filter Operator predicate: (p_size is not null and p_mfgr is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_size (type: int), p_mfgr (type: string) - sort order: ++ - Map-reduce partition columns: p_size (type: int), p_mfgr (type: string) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan - alias: part + alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) @@ -460,12 +472,12 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 p_size (type: int), p_mfgr (type: string) + 0 _col2 (type: int), _col1 (type: string) 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -617,36 +629,44 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 key (type: string), value (type: string) + 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE @@ -756,9 +776,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -767,33 +787,19 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: l_partkey (type: int) - sort order: + - Map-reduce partition columns: l_partkey (type: int) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - value expressions: l_orderkey (type: int), l_suppkey (type: int) - Map 6 + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Map 4 Map Operator Tree: TableScan alias: lineitem @@ -815,46 +821,56 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3 + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) - Reducer 4 + value expressions: _col2 (type: int) + Reducer 3 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -864,6 +880,18 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out index 92a8595..1bfdba2 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out @@ -237,19 +237,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Move Operator @@ -580,16 +580,16 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 diff --git a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out index 3660f1a..7576b48 100644 --- a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out @@ -22,7 +22,7 @@ SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_2 +Operator:GBY_3 Table:default@t1 Keys:key @@ -35,7 +35,7 @@ PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_2 +Operator:GBY_3 Table:default@t1 Keys:key,val @@ -77,7 +77,7 @@ SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_2 +Operator:GBY_3 Table:default@t1 Keys:key @@ -90,7 +90,7 @@ PREHOOK: query: SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_2 +Operator:GBY_3 Table:default@t1 Keys:key,val @@ -104,7 +104,7 @@ PREHOOK: query: SELECT key, 1, val, 2, count(1) FROM T1 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_2 +Operator:GBY_3 Table:default@t1 Keys:key,val @@ -130,7 +130,7 @@ group by key + key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_2 +Operator:GBY_3 Table:default@t1 Keys:key @@ -148,11 +148,11 @@ SELECT key, count(1) as c FROM T1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_2 +Operator:GBY_3 Table:default@t1 Keys:key -Operator:GBY_8 +Operator:GBY_10 Table:default@t1 Keys:key @@ -242,7 +242,7 @@ GROUP BY key, constant3, val PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key,val diff --git a/ql/src/test/results/clientpositive/spark/temp_table.q.out b/ql/src/test/results/clientpositive/spark/temp_table.q.out index 0801abc..65e256d 100644 --- a/ql/src/test/results/clientpositive/spark/temp_table.q.out +++ b/ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -19,7 +19,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 2) = 0) (type: boolean) + predicate: ((UDFToDouble(key) % 2.0) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -85,7 +85,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 2) = 1) (type: boolean) + predicate: ((UDFToDouble(key) % 2.0) = 1.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union.q.out b/ql/src/test/results/clientpositive/spark/union.q.out index 2644473..a78504f 100644 --- a/ql/src/test/results/clientpositive/spark/union.q.out +++ b/ql/src/test/results/clientpositive/spark/union.q.out @@ -35,7 +35,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) + predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -54,7 +54,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union10.q.out b/ql/src/test/results/clientpositive/spark/union10.q.out index 9698dda..5e8fe38 100644 --- a/ql/src/test/results/clientpositive/spark/union10.q.out +++ b/ql/src/test/results/clientpositive/spark/union10.q.out @@ -63,7 +63,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -79,7 +79,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: s3 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -194,7 +194,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [] -POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, (src)s3.null, ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [] PREHOOK: query: select * from tmptable x sort by x.key PREHOOK: type: QUERY PREHOOK: Input: default@tmptable diff --git a/ql/src/test/results/clientpositive/spark/union11.q.out b/ql/src/test/results/clientpositive/spark/union11.q.out index 300bad4..20c27c7 100644 --- a/ql/src/test/results/clientpositive/spark/union11.q.out +++ b/ql/src/test/results/clientpositive/spark/union11.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -67,7 +67,7 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: s3 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -91,22 +91,18 @@ STAGE PLANS: expressions: 'tst1' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -133,22 +129,18 @@ STAGE PLANS: expressions: 'tst2' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 7 Reduce Operator Tree: Group By Operator @@ -160,22 +152,18 @@ STAGE PLANS: expressions: 'tst3' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/union12.q.out b/ql/src/test/results/clientpositive/spark/union12.q.out index b9ddfc8..cf2c7b7 100644 --- a/ql/src/test/results/clientpositive/spark/union12.q.out +++ b/ql/src/test/results/clientpositive/spark/union12.q.out @@ -198,7 +198,7 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcbucket POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [] -POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.null, (srcbucket)s3.null, ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [] PREHOOK: query: select * from tmptable x sort by x.key PREHOOK: type: QUERY PREHOOK: Input: default@tmptable diff --git a/ql/src/test/results/clientpositive/spark/union13.q.out b/ql/src/test/results/clientpositive/spark/union13.q.out index fca251c..0297988 100644 --- a/ql/src/test/results/clientpositive/spark/union13.q.out +++ b/ql/src/test/results/clientpositive/spark/union13.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union14.q.out b/ql/src/test/results/clientpositive/spark/union14.q.out index 77051df..0c9542b 100644 --- a/ql/src/test/results/clientpositive/spark/union14.q.out +++ b/ql/src/test/results/clientpositive/spark/union14.q.out @@ -37,22 +37,18 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -95,22 +91,18 @@ STAGE PLANS: expressions: 'tst1' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/union15.q.out b/ql/src/test/results/clientpositive/spark/union15.q.out index c51dd74..6be13c9 100644 --- a/ql/src/test/results/clientpositive/spark/union15.q.out +++ b/ql/src/test/results/clientpositive/spark/union15.q.out @@ -55,47 +55,39 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 5 Map Operator Tree: TableScan - alias: s3 + alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -107,22 +99,18 @@ STAGE PLANS: expressions: 'tst1' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union2.q.out b/ql/src/test/results/clientpositive/spark/union2.q.out index 881ce21..e4afb1b 100644 --- a/ql/src/test/results/clientpositive/spark/union2.q.out +++ b/ql/src/test/results/clientpositive/spark/union2.q.out @@ -44,7 +44,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/spark/union20.q.out b/ql/src/test/results/clientpositive/spark/union20.q.out index 546355f..6f0dca6 100644 --- a/ql/src/test/results/clientpositive/spark/union20.q.out +++ b/ql/src/test/results/clientpositive/spark/union20.q.out @@ -58,10 +58,10 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: s3 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -92,10 +92,10 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: s4 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union24.q.out b/ql/src/test/results/clientpositive/spark/union24.q.out index 7a05198..658ff9e 100644 --- a/ql/src/test/results/clientpositive/spark/union24.q.out +++ b/ql/src/test/results/clientpositive/spark/union24.q.out @@ -198,7 +198,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), count (type: bigint) @@ -282,7 +282,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), count (type: bigint) @@ -366,7 +366,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), count (type: bigint) @@ -450,22 +450,26 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/union25.q.out b/ql/src/test/results/clientpositive/spark/union25.q.out index db7dfc5..5193c06 100644 --- a/ql/src/test/results/clientpositive/spark/union25.q.out +++ b/ql/src/test/results/clientpositive/spark/union25.q.out @@ -100,43 +100,35 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union28.q.out b/ql/src/test/results/clientpositive/spark/union28.q.out index 5160879..98582df 100644 --- a/ql/src/test/results/clientpositive/spark/union28.q.out +++ b/ql/src/test/results/clientpositive/spark/union28.q.out @@ -73,20 +73,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: key (type: string), value (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -94,76 +92,56 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: key (type: string), value (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union30.q.out b/ql/src/test/results/clientpositive/spark/union30.q.out index a81417d..3409623 100644 --- a/ql/src/test/results/clientpositive/spark/union30.q.out +++ b/ql/src/test/results/clientpositive/spark/union30.q.out @@ -87,20 +87,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: key (type: string), value (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -108,20 +106,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: key (type: string), value (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -146,59 +142,41 @@ STAGE PLANS: Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union33.q.out b/ql/src/test/results/clientpositive/spark/union33.q.out index 680a627..0e6b1aa 100644 --- a/ql/src/test/results/clientpositive/spark/union33.q.out +++ b/ql/src/test/results/clientpositive/spark/union33.q.out @@ -51,7 +51,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 0) (type: boolean) + predicate: (UDFToDouble(key) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '0' (type: string), value (type: string) @@ -72,11 +72,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -205,11 +205,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -225,7 +225,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 0) (type: boolean) + predicate: (UDFToDouble(key) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '0' (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union4.q.out b/ql/src/test/results/clientpositive/spark/union4.q.out index cdda727..c121ef0 100644 --- a/ql/src/test/results/clientpositive/spark/union4.q.out +++ b/ql/src/test/results/clientpositive/spark/union4.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -148,7 +148,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [] -POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [] PREHOOK: query: select * from tmptable x sort by x.key PREHOOK: type: QUERY PREHOOK: Input: default@tmptable diff --git a/ql/src/test/results/clientpositive/spark/union5.q.out b/ql/src/test/results/clientpositive/spark/union5.q.out index a65c4e8..afee988 100644 --- a/ql/src/test/results/clientpositive/spark/union5.q.out +++ b/ql/src/test/results/clientpositive/spark/union5.q.out @@ -46,7 +46,7 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -70,22 +70,18 @@ STAGE PLANS: expressions: 'tst1' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -112,22 +108,18 @@ STAGE PLANS: expressions: 'tst2' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/union6.q.out b/ql/src/test/results/clientpositive/spark/union6.q.out index 8146e12..6176beb 100644 --- a/ql/src/test/results/clientpositive/spark/union6.q.out +++ b/ql/src/test/results/clientpositive/spark/union6.q.out @@ -121,7 +121,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [(src1)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from tmptable x sort by x.key, x.value PREHOOK: type: QUERY PREHOOK: Input: default@tmptable diff --git a/ql/src/test/results/clientpositive/spark/union7.q.out b/ql/src/test/results/clientpositive/spark/union7.q.out index 7427959..4a81283 100644 --- a/ql/src/test/results/clientpositive/spark/union7.q.out +++ b/ql/src/test/results/clientpositive/spark/union7.q.out @@ -51,22 +51,18 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -78,22 +74,18 @@ STAGE PLANS: expressions: 'tst1' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union8.q.out b/ql/src/test/results/clientpositive/spark/union8.q.out index d7211ed..03545f5 100644 --- a/ql/src/test/results/clientpositive/spark/union8.q.out +++ b/ql/src/test/results/clientpositive/spark/union8.q.out @@ -42,7 +42,7 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -58,7 +58,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: s3 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union9.q.out b/ql/src/test/results/clientpositive/spark/union9.q.out index 804ca1e..d420ef1 100644 --- a/ql/src/test/results/clientpositive/spark/union9.q.out +++ b/ql/src/test/results/clientpositive/spark/union9.q.out @@ -46,7 +46,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -64,7 +64,7 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: s3 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out index bf0fc20..ba0e293 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out @@ -79,11 +79,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -100,11 +100,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out index a9d4b51..2718775 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out @@ -98,7 +98,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -116,11 +116,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -136,7 +136,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out index 58abd21..26cfbab 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -85,11 +85,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -106,11 +106,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out index 356e79a..7a7aaf2 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -88,11 +88,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -109,11 +109,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out index 2c01a5c..a5e15c5 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -83,11 +83,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), ds (type: string) - outputColumnNames: key, ds + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), ds (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -104,11 +104,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), ds (type: string) - outputColumnNames: key, ds + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), ds (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out index d1e2312..ad44400 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out @@ -83,11 +83,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -104,11 +104,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -273,49 +273,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 7) (type: boolean) - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) = 7.0) (type: boolean) + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '7' (type: string) - outputColumnNames: key - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 7) (type: boolean) - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) = 7.0) (type: boolean) + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '7' (type: string) - outputColumnNames: key - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -324,23 +324,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '7' (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -348,23 +340,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '7' (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -451,43 +435,49 @@ STAGE PLANS: TableScan alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Filter Operator + predicate: ((UDFToDouble(key) + UDFToDouble(key)) >= 7.0) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Filter Operator + predicate: ((UDFToDouble(key) + UDFToDouble(key)) >= 7.0) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -497,20 +487,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) + expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 >= 7.0) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -520,20 +507,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) + expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 >= 7.0) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out index 59d88cb..26c4eff 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out @@ -84,11 +84,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -104,7 +104,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -121,7 +121,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out index f57353f..1d67177 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -102,11 +102,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out index 48867fb..9f5b070 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out @@ -81,20 +81,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -102,68 +100,48 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_22.q.out b/ql/src/test/results/clientpositive/spark/union_remove_22.q.out index c41e12f..2e01432 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_22.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_22.q.out @@ -284,11 +284,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -305,11 +305,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out index e989ed2..2659798 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out @@ -77,11 +77,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -98,11 +98,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out index a5a0126..0a94684 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -95,11 +95,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -116,11 +116,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out index 7d94d1d..6c3d596 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out @@ -84,11 +84,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -105,11 +105,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out index b31b3e6..55baa8b 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out @@ -91,11 +91,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -111,7 +111,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -128,7 +128,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out index 988973f..c981ae4 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out @@ -432,11 +432,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -453,11 +453,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out index 2a5eccf..084fbd6 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out @@ -83,11 +83,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -104,11 +104,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out index 7aee2af..f580bd8 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out @@ -88,11 +88,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -108,7 +108,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -125,7 +125,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out index 87ceca2..0931d11 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out @@ -95,11 +95,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/union_top_level.q.out b/ql/src/test/results/clientpositive/spark/union_top_level.q.out index 5136989..dede1ef 100644 --- a/ql/src/test/results/clientpositive/spark/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/spark/union_top_level.q.out @@ -37,7 +37,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 0) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 0 (type: int) @@ -56,7 +56,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 1) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 1 (type: int) @@ -75,7 +75,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 2) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 2 (type: int) @@ -414,7 +414,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 0) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 0 (type: int) @@ -433,7 +433,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 1) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 1 (type: int) @@ -452,7 +452,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 2) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 2 (type: int) @@ -616,7 +616,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 0) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 0 (type: int) @@ -635,7 +635,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 1) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 1 (type: int) @@ -654,7 +654,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 2) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 2 (type: int) @@ -805,7 +805,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 0) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 0 (type: int) @@ -824,7 +824,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 1) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 1.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 1 (type: int) @@ -843,7 +843,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key % 3) = 2) (type: boolean) + predicate: ((UDFToDouble(key) % 3.0) = 2.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), 2 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out index 44ecd09..69f4754 100644 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out +++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out @@ -133,11 +133,11 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int) - outputColumnNames: i + outputColumnNames: _col0 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(50), avg(UDFToDouble(50)), avg(CAST( 50 AS decimal(10,0))) - keys: i (type: int) + aggregations: avg(50), avg(50.0), avg(50) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index cb2d56b..7e161d1 100644 --- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -54,11 +54,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdecimal1, cdecimal2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() - keys: cint (type: int) + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), count(_col2), max(_col2), min(_col2), sum(_col2), count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE @@ -159,11 +159,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdecimal1, cdecimal2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() - keys: cint (type: int) + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), avg(_col1), stddev_pop(_col1), stddev_samp(_col1), count(_col2), max(_col2), min(_col2), sum(_col2), avg(_col2), stddev_pop(_col2), stddev_samp(_col2), count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index 23f2b98..322270f 100644 --- a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -129,10 +129,10 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), t (type: tinyint) - outputColumnNames: s, t + outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: s (type: string), t (type: tinyint) + keys: _col0 (type: string), _col1 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vector_elt.q.out b/ql/src/test/results/clientpositive/spark/vector_elt.q.out index 818e266..49d1458 100644 --- a/ql/src/test/results/clientpositive/spark/vector_elt.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_elt.q.out @@ -21,7 +21,7 @@ STAGE PLANS: predicate: (ctinyint > 0) (type: boolean) Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ((ctinyint % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((ctinyint % 2) + 1), cstring1, cint) (type: string) + expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index 6f62820..3d6a236 100644 --- a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -129,11 +129,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), t (type: tinyint), b (type: bigint) - outputColumnNames: s, t, b + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(b) - keys: s (type: string), t (type: tinyint) + aggregations: max(_col2) + keys: _col0 (type: string), _col1 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 02c1fc6..8cf1a81 100644 --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -30,23 +30,11 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan alias: lineitem @@ -65,18 +53,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: lineitem @@ -84,17 +65,21 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash + Select Operator + expressions: l_partkey (type: int) outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -103,38 +88,60 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + Spark HashTable Sink Operator keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -181,60 +188,80 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((l_partkey is not null and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) + expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int), _col4 (type: int) + 0 _col0 (type: int), _col3 (type: int) 1 _col0 (type: int), _col1 (type: int) Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 14 Data size: 1714 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: lineitem @@ -242,17 +269,21 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash + Select Operator + expressions: l_partkey (type: int) outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -265,33 +296,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 0 Map 1 Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index 3363c8b..6bbb6b9 100644 --- a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -126,11 +126,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: bo (type: boolean), b (type: bigint) - outputColumnNames: bo, b + outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(b) - keys: bo (type: boolean) + aggregations: max(_col1) + keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 9ec8538..b78aec9 100644 --- a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -300,11 +300,11 @@ STAGE PLANS: alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dt (type: date) - outputColumnNames: dt + expressions: concat(concat(concat('Quarter ', UDFToString(UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0) + 1.0)))), '-'), UDFToString(year(dt))) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: concat(concat(concat('Quarter ', UDFToString(UDFToInteger((((month(dt) - 1) / 3) + 1)))), '-'), UDFToString(year(dt))) (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index f61ef7d..b2dd09d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -129,10 +129,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(ctinyint) + aggregations: sum(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -435,10 +435,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) - outputColumnNames: cbigint + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(cbigint) + aggregations: sum(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -741,10 +741,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float) - outputColumnNames: cfloat + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(cfloat) + aggregations: sum(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -990,14 +990,14 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cstring2 like '%b%') or ((79.553 <> cint) or (cbigint < cdouble))) (type: boolean) + predicate: ((cstring2 like '%b%') or ((79.553 <> UDFToDouble(cint)) or (UDFToDouble(cbigint) < cdouble))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) - outputColumnNames: cbigint, cfloat, ctinyint + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + aggregations: avg(_col0), stddev_pop(_col0), var_samp(_col0), count(), sum(_col1), min(_col2) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1014,7 +1014,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (-6432 + _col0) (type: double), _col1 (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) + (-6432 + _col0)) (type: double), _col2 (type: double), (- (-6432 + _col0)) (type: double), (-6432 + (- (-6432 + _col0))) (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) / (- (-6432 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out index 41ee583..be00d16 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -82,15 +82,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((ctimestamp1 > 11) and ((ctimestamp2 <> 12) and (ctinyint < 9763215.5639)))) (type: boolean) + predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((UDFToDouble(ctimestamp1) > 11.0) and ((UDFToDouble(ctimestamp2) <> 12.0) and (UDFToDouble(ctinyint) < 9763215.5639)))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) - outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + aggregations: max(_col1), sum(_col3), stddev_pop(_col3), stddev_pop(_col1), max(_col3), min(_col1) + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE @@ -110,7 +110,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * _col3) (type: double), _col7 (type: double), _col8 (type: double), (((- _col1) + _col5) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * ((- _col1) + _col5)) / _col1) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * ((- _col1) + _col5)) (type: double) + expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * UDFToDouble(_col3)) (type: double), _col7 (type: double), _col8 (type: double), (UDFToDouble(((- _col1) + _col5)) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -336,15 +336,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((ctimestamp1 > -1.388) and ((ctimestamp2 <> -1.3359999999999999) and (ctinyint < 9763215.5639)))) (type: boolean) + predicate: (((cfloat < 3569.0) and ((10.175 >= cdouble) and (cboolean1 <> 1))) or ((UDFToDouble(ctimestamp1) > -1.388) and ((UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (UDFToDouble(ctinyint) < 9763215.5639)))) (type: boolean) Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) - outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + aggregations: max(_col1), sum(_col3), stddev_pop(_col3), stddev_pop(_col1), max(_col3), min(_col1) + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE @@ -364,7 +364,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * _col3) (type: double), _col7 (type: double), _col8 (type: double), (((- _col1) + _col5) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * ((- _col1) + _col5)) / _col1) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * ((- _col1) + _col5)) (type: double) + expressions: _col0 (type: boolean), _col1 (type: tinyint), (- _col6) (type: double), (79.553 * UDFToDouble(_col3)) (type: double), _col7 (type: double), _col8 (type: double), (UDFToDouble(((- _col1) + _col5)) - 10.175) (type: double), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col2 (type: timestamp), _col10 (type: tinyint), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 1de639b..cbfa70e 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -82,15 +82,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((ctinyint <= cbigint) and ((cint <= cdouble) or (ctimestamp2 < ctimestamp1))) and (cdouble < ctinyint)) and ((cbigint > -257) or (cfloat < cint))) (type: boolean) + predicate: ((((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) and (cdouble < UDFToDouble(ctinyint))) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double) - outputColumnNames: ctimestamp1, cfloat, cstring1, cboolean1, cdouble + expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp((- ((- 26.28) + cdouble))), max(cfloat), stddev_pop(cfloat), count(cfloat), var_pop(cfloat), var_samp(cfloat) - keys: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double) + aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE @@ -110,7 +110,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (_col1 - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (_col1 - _col4)) (type: double) + expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 960a058..a58d941 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -78,15 +78,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((cstring2 like '%ss%') or (cstring1 like '10%')) or ((cint >= -75) and ((ctinyint = csmallint) and (cdouble >= -3728)))) (type: boolean) + predicate: ((cstring2 like '%ss%') or ((cstring1 like '10%') or ((cint >= -75) and ((UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) - outputColumnNames: cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) + aggregations: stddev_samp(_col0), min(_col2), stddev_samp(_col4), var_pop(_col4), var_samp(_col5), stddev_pop(_col5) + keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE @@ -106,7 +106,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - _col5) (type: double), _col8 (type: double), (_col2 * 79.553) (type: double), (33 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (_col5 - _col0) (type: float), (-23 % _col4) (type: int), (- (-26.28 - _col5)) (type: double), _col12 (type: double) + expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - UDFToDouble(_col5)) (type: double), _col8 (type: double), (_col2 * 79.553) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - UDFToDouble(_col5))) (type: double), _col12 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out index 473eaf4..a42c30a 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -63,11 +63,11 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp) - outputColumnNames: cstring1, cdouble, ctimestamp1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) - keys: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp) + aggregations: count(_col1), stddev_samp(_col1), min(_col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE @@ -87,7 +87,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * _col3) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (_col3 / -1.389) (type: double), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (UDFToDouble(_col3) / -1.389) (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 17b35fe..8bbeb51 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -59,11 +59,11 @@ STAGE PLANS: Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp) - outputColumnNames: cstring1, cdouble, ctimestamp1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) - keys: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp) + aggregations: count(_col1), stddev_samp(_col1), min(_col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE @@ -83,7 +83,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * _col3) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (_col3 / -1.389) (type: double), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639) (type: double), (- (_col1 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col1) (type: double), (UDFToDouble(_col3) / -1.389) (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index 30d116f..5887839 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -168,7 +168,7 @@ STAGE PLANS: predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (cbigint - 988888) (type: bigint), (cdouble / (cbigint - 988888)) (type: double), (1.2 / (cbigint - 988888)) (type: double) + expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / UDFToDouble((cbigint - 988888))) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -340,10 +340,10 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cdouble >= -500) and (cdouble < -199)) (type: boolean) + predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (cdouble + 200.0) (type: double), (cbigint / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) + expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out b/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out index f92e823..f0852ea 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out @@ -67,7 +67,7 @@ STAGE PLANS: alias: alltypesorc_part Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (cdouble + 2) (type: double) + expressions: (cdouble + 2.0) (type: double) outputColumnNames: _col0 Statistics: Num rows: 200 Data size: 54496 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 6e784cd..eb8914b 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -18,17 +18,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (cbigint < cdouble) (type: boolean) + filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cbigint < cdouble) (type: boolean) + predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) - outputColumnNames: cbigint + outputColumnNames: _col0 Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint) + aggregations: avg(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index c2035f2..657f1b5 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -148,14 +148,14 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((762 = cbigint) or ((csmallint < cfloat) and ((ctimestamp2 > -5) and (cdouble <> cint)))) or (cstring1 = 'a')) or ((cbigint <= -1.389) and ((cstring2 <> 'a') and ((79.553 <> cint) and (cboolean2 <> cboolean1))))) (type: boolean) + predicate: ((762 = cbigint) or (((UDFToFloat(csmallint) < cfloat) and ((UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint)))) or ((cstring1 = 'a') or ((UDFToDouble(cbigint) <= -1.389) and ((cstring2 <> 'a') and ((79.553 <> UDFToDouble(cint)) and (cboolean2 <> cboolean1))))))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) - outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + aggregations: avg(_col0), sum(_col1), stddev_pop(_col0), stddev_samp(_col2), var_samp(_col0), avg(_col3), stddev_samp(_col0), min(_col4), count(_col2) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -172,7 +172,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 + -3728) (type: double), (- (_col0 + -3728)) (type: double), (- (- (_col0 + -3728))) (type: double), ((- (- (_col0 + -3728))) * (_col0 + -3728)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728))) * (_col0 + -3728)) * (- (- (_col0 + -3728)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728)))) (type: double), ((_col2 - (- (- (_col0 + -3728)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563) (type: double), _col6 (type: double), (- ((- _col2) / -563)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (_col7 / ((- _col2) / -563)) (type: double), (- (_col0 / _col1)) (type: double) + expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -360,14 +360,14 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((cbigint <= 197) and (cint < cbigint)) or ((cdouble >= -26.28) and (csmallint > cdouble))) or ((ctinyint > cfloat) and cstring1 regexp '.*ss.*')) or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) + predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or (((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or (((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))))) (type: boolean) Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) - outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + aggregations: max(_col0), var_pop(_col1), stddev_pop(_col2), max(_col3), avg(_col4), min(_col0), min(_col3), stddev_samp(_col2), var_samp(_col0) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -384,7 +384,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), (_col0 / -3728) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), ((_col0 * -3728) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (_col0 / -3728)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((_col0 / -3728) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) + expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -563,14 +563,14 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((ctimestamp1 = ctimestamp2) or (762.0 = cfloat)) or (cstring1 = 'ss')) or ((csmallint <= cbigint) and (1 = cboolean2))) or (cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a')))) (type: boolean) + predicate: ((ctimestamp1 = ctimestamp2) or ((762.0 = cfloat) or ((cstring1 = 'ss') or (((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a'))))))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) - outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + aggregations: var_pop(_col0), count(), max(_col1), stddev_pop(_col2), max(_col3), stddev_samp(_col4), count(_col1), avg(_col1) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -587,7 +587,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (_col1 % 79.553) (type: double), _col2 (type: tinyint), (_col1 - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (_col2 + (762 * (- _col1))) (type: bigint), ((- _col0) + _col4) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (_col2 + (762 * (- _col1)))) (type: bigint) + expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (UDFToDouble(_col1) % 79.553) (type: double), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -745,14 +745,14 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((ctimestamp2 <= ctimestamp1) and ((cbigint <> cdouble) and ('ss' <= cstring1))) or ((csmallint < ctinyint) and (ctimestamp1 >= 0))) or (cfloat = 17.0)) (type: boolean) + predicate: (((ctimestamp2 <= ctimestamp1) and ((UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1))) or (((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17.0))) (type: boolean) Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cint, cfloat + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + aggregations: avg(_col0), max(_col1), stddev_samp(_col2), var_pop(_col2), var_pop(_col1), max(_col3) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -769,7 +769,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 + 6981) (type: double), ((_col0 + 6981) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981) + _col0) / _col0) (type: double), (- (_col0 + 6981)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), ((- _col1) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double) + expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -935,10 +935,10 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and ((csmallint < 79.553) and (-257 <> ctinyint)))) or ((cdouble > ctinyint) and (cfloat >= cint))) or ((cint < cbigint) and (ctinyint > cbigint))) (type: boolean) + predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or (((1 <> cboolean2) and ((UDFToDouble(csmallint) < 79.553) and (-257 <> UDFToInteger(ctinyint)))) or (((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))))) (type: boolean) Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - cint) (type: double), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (cint / cint) (type: double), ((-863.257 - cint) - -26.28) (type: double), (- cfloat) (type: float), (cdouble * -89010) (type: double), (ctinyint / 988888) (type: double), (- ctinyint) (type: tinyint), (79.553 / ctinyint) (type: double) + expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - UDFToDouble(cint)) (type: double), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - UDFToDouble(cint)) - -26.28) (type: double), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1194,10 +1194,10 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((197.0 > ctinyint) and (cint = cbigint)) or (cbigint = 359)) or (cboolean1 < 0)) or ((cstring1 like '%ss') and (cfloat <= ctinyint))) (type: boolean) + predicate: (((197.0 > UDFToDouble(ctinyint)) and (UDFToLong(cint) = cbigint)) or ((cbigint = 359) or ((cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), cbigint (type: bigint), (cint / cbigint) (type: double), (cbigint % 79.553) (type: double), (- (cint / cbigint)) (type: double), (10.175 % cfloat) (type: double), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432) (type: float), (cdouble * csmallint) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), cstring1 (type: string), (cfloat - (cint / cbigint)) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359 - cdouble) (type: double), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean) + expressions: cint (type: int), cbigint (type: bigint), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (UDFToDouble(cbigint) % 79.553) (type: double), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), cstring1 (type: string), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1402,10 +1402,10 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((csmallint > -26.28) and (cstring2 like 'ss')) or ((cdouble <= cbigint) and ((cstring1 >= 'ss') and (cint <> cdouble)))) or (ctinyint = -89010)) or ((cbigint <= cfloat) and (-26.28 <= csmallint))) (type: boolean) + predicate: (((UDFToDouble(csmallint) > -26.28) and (cstring2 like 'ss')) or (((cdouble <= UDFToDouble(cbigint)) and ((cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble))) or ((UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= UDFToDouble(csmallint)))))) (type: boolean) Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + csmallint) (type: int), (cbigint - ctinyint) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - ctinyint) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), ((cint + csmallint) * (- cbigint)) (type: bigint), ((- cdouble) + cbigint) (type: double), (-1.389 / ctinyint) (type: double), (cbigint % cdouble) (type: double), (- csmallint) (type: smallint), (csmallint + (cint + csmallint)) (type: int) + expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / UDFToDouble(ctinyint)) (type: double), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1668,10 +1668,10 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((-1.389 >= cint) and ((csmallint < ctinyint) and (-6432 > csmallint))) or ((cdouble >= cfloat) and (cstring2 <= 'a'))) or ((cstring1 like 'ss%') and (10.175 > cbigint))) (type: boolean) + predicate: (((-1.389 >= UDFToDouble(cint)) and ((csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint)))) or (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > UDFToDouble(cbigint))))) (type: boolean) Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), (cdouble * 10.175) (type: double), ((-6432 * cfloat) / cfloat) (type: double), (- cfloat) (type: float), (cint % csmallint) (type: int), (cdouble * (- cdouble)) (type: double), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (cbigint / 3569) (type: double), (-257 - csmallint) (type: int), (-6432 * cfloat) (type: float), (- cdouble) (type: double) + expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (cdouble * (- cdouble)) (type: double), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col15, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1876,15 +1876,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint >= -257) and ((-6432 = csmallint) or ((cint >= cdouble) and (ctinyint <= cint)))) (type: boolean) + predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) - outputColumnNames: csmallint, cbigint, ctinyint + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() - keys: csmallint (type: smallint) + aggregations: stddev_samp(_col0), sum(_col1), var_pop(_col2), count() + keys: _col0 (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE @@ -1904,7 +1904,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: smallint), (_col0 % -75) (type: int), _col1 (type: double), (-1.389 / _col0) (type: double), _col2 (type: bigint), ((_col0 % -75) / _col2) (type: double), (- (_col0 % -75)) (type: int), _col3 (type: double), (- (- (_col0 % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) + expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / UDFToDouble(_col0)) (type: double), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2084,15 +2084,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cdouble > 2563.58) and (((cbigint >= cint) and ((csmallint < cint) and (cfloat < -5638.15))) or ((cdouble <= cbigint) and (-5638.15 > cbigint)))) (type: boolean) + predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and ((UDFToInteger(csmallint) < cint) and (UDFToDouble(cfloat) < -5638.15))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > UDFToDouble(cbigint))))) (type: boolean) Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cfloat (type: float) - outputColumnNames: cdouble, cfloat + outputColumnNames: _col0, _col1 Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) - keys: cdouble (type: double) + aggregations: var_samp(_col0), count(_col1), sum(_col1), var_pop(_col0), stddev_pop(_col0), sum(_col0) + keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE @@ -2112,7 +2112,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double) + expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2339,15 +2339,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ctimestamp1 <> 0) and (((((((-257 <> ctinyint) and cboolean2 is not null) and (cstring1 regexp '.*ss' and (-3 < ctimestamp1))) or (ctimestamp2 = -5)) or ((ctimestamp1 < 0) and (cstring2 like '%b%'))) or (cdouble = cint)) or (cboolean1 is null and (cfloat < cint)))) (type: boolean) + predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and (cboolean2 is not null and (cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))))) or ((UDFToDouble(ctimestamp2) = -5.0) or (((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or ((cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint)))))))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) - keys: ctimestamp1 (type: timestamp), cstring1 (type: string) + aggregations: stddev_pop(_col2), avg(_col3), count(), min(_col4), var_samp(_col3), var_pop(_col5), avg(_col2), var_samp(_col5), avg(_col5), min(_col6), var_pop(_col3), stddev_pop(_col4), sum(_col2) + keys: _col0 (type: timestamp), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE @@ -2367,7 +2367,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: string), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * (- _col4)) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) (type: double), (- (- _col2)) (type: double), ((- _col4) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col2 (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) - (((-26.28 - _col2) * (- _col2)) * (- _col4))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) - (((-26.28 - _col2) * (- _col2)) * (- _col4))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (_col2 * 10.175) (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / (- _col5)) (type: double), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) / _col7) (type: double), (- (- _col4)) (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * (- _col4))) % -26.28) (type: double), (- _col2) (type: double), _col3 (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint) + expressions: _col0 (type: timestamp), _col1 (type: string), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col2 (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (_col2 * 10.175) (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / UDFToDouble((- _col5))) (type: double), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double), (- _col2) (type: double), _col3 (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col38, _col4, _col5, _col7, _col8, _col9 Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2672,15 +2672,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cboolean1 is not null and (((((cdouble < csmallint) and ((cboolean2 = cboolean1) and (cbigint <= -863.257))) or ((cint >= -257) and (cstring1 is not null and (cboolean1 >= 1)))) or cstring2 regexp 'b') or ((csmallint >= ctinyint) and ctimestamp2 is null))) (type: boolean) + predicate: (cboolean1 is not null and (((cdouble < UDFToDouble(csmallint)) and ((cboolean2 = cboolean1) and (UDFToDouble(cbigint) <= -863.257))) or (((cint >= -257) and (cstring1 is not null and (cboolean1 >= 1))) or (cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null))))) (type: boolean) Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) - outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) - keys: cboolean1 (type: boolean) + aggregations: max(_col1), sum(_col2), var_samp(_col3), avg(_col4), min(_col2), var_pop(_col2), sum(_col3), stddev_samp(_col5), stddev_pop(_col6), avg(_col3) + keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE @@ -2700,7 +2700,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2389 Data size: 73341 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: float), ((_col2 - 10.175) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: double), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * _col5) (type: double), (- _col1) (type: float), (_col7 - (-1.389 * _col5)) (type: double), _col9 (type: double), (- (_col7 - (-1.389 * _col5))) (type: double), _col10 (type: double), (- _col10) (type: double), (_col10 * _col7) (type: double), (-26.28 / _col1) (type: double), _col2 (type: bigint), (_col2 - 10.175) (type: double), _col3 (type: double), (_col3 % _col1) (type: double), (10.175 + (- _col1)) (type: double), _col4 (type: double) + expressions: _col0 (type: boolean), _col1 (type: float), ((UDFToDouble(_col2) - 10.175) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + UDFToDouble((- _col1)))) (type: double), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * UDFToDouble(_col5)) (type: double), (- _col1) (type: float), (UDFToDouble(_col7) - (-1.389 * UDFToDouble(_col5))) (type: double), _col9 (type: double), (- (UDFToDouble(_col7) - (-1.389 * UDFToDouble(_col5)))) (type: double), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (UDFToDouble(_col2) - 10.175) (type: double), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + UDFToDouble((- _col1))) (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 2389 Data size: 73341 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2915,10 +2915,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: i (type: int) - outputColumnNames: i + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(i) + aggregations: count(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -3093,10 +3093,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(ctinyint) + aggregations: count(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -3160,10 +3160,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) - outputColumnNames: cint + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cint) + aggregations: count(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -3227,10 +3227,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float) - outputColumnNames: cfloat + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cfloat) + aggregations: count(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -3294,10 +3294,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string) - outputColumnNames: cstring1 + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cstring1) + aggregations: count(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -3361,10 +3361,10 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean) - outputColumnNames: cboolean1 + outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 168 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cboolean1) + aggregations: count(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out index daf6ad3..0e1e9d3 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -46,7 +46,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((csmallint = 418) or (csmallint = 12205)) or (csmallint = 10583)) (type: boolean) + predicate: ((csmallint = 418) or ((csmallint = 12205) or (csmallint = 10583))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) diff --git a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 78b0b37..b6c2b35 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -23,15 +23,19 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 cint (type: int) - 1 cint (type: int) + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -49,25 +53,33 @@ STAGE PLANS: Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 cint (type: int) - 1 cint (type: int) - outputColumnNames: _col2, _col17 - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Local Work: Map Reduce Local Work Execution mode: vectorized diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index b84a4c3..881641f 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -31,25 +31,33 @@ STAGE PLANS: Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 5 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -57,19 +65,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 cint (type: int) - 1 cint (type: int) - outputColumnNames: _col2, _col17 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index ed1ba4b..290db0d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -644,10 +644,10 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 + outputColumnNames: _col0 Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + aggregations: min(_col0), max(_col0), count(_col0), count() mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE @@ -725,10 +725,10 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 + outputColumnNames: _col0 Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(ctimestamp1) + aggregations: sum(_col0) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -815,10 +815,10 @@ STAGE PLANS: Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 + outputColumnNames: _col0 Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + aggregations: avg(_col0), variance(_col0), var_pop(_col0), var_samp(_col0), std(_col0), stddev(_col0), stddev_pop(_col0), stddev_samp(_col0) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git a/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out b/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out index c1f3de7..f6bfad2 100644 --- a/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out +++ b/ql/src/test/results/clientpositive/tez/cbo_subq_in.q.out @@ -69,6 +69,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem #### A masked pattern was here #### @@ -77,6 +78,7 @@ select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) + order by p.p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@lineitem #### A masked pattern was here ####