diff --git a/ql/src/test/queries/clientpositive/union23.q b/ql/src/test/queries/clientpositive/union23.q index 052f34a..0574ceb 100644 --- a/ql/src/test/queries/clientpositive/union23.q +++ b/ql/src/test/queries/clientpositive/union23.q @@ -1,18 +1,17 @@ set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; diff --git a/ql/src/test/queries/clientpositive/union32.q b/ql/src/test/queries/clientpositive/union32.q index f47f0af..b18d484 100644 --- a/ql/src/test/queries/clientpositive/union32.q +++ b/ql/src/test/queries/clientpositive/union32.q @@ -12,8 +12,7 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL -SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key; +SELECT CAST(key AS BIGINT) AS key FROM t2) a; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 diff --git a/ql/src/test/queries/clientpositive/union34.q b/ql/src/test/queries/clientpositive/union34.q index 2ab16de..dbefc40 100644 --- a/ql/src/test/queries/clientpositive/union34.q +++ b/ql/src/test/queries/clientpositive/union34.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS create table src10_1 (key string, value string); create table src10_2 (key string, value string); create table src10_3 (key string, value string); @@ -18,13 +19,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; set hive.auto.convert.join=false; -- When we do not convert the Join of sub1 and sub0 into a MapJoin, @@ -36,10 +37,10 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; diff --git a/ql/src/test/queries/clientpositive/union36.q b/ql/src/test/queries/clientpositive/union36.q index c38e7b1..b79ff0f 100644 --- a/ql/src/test/queries/clientpositive/union36.q +++ b/ql/src/test/queries/clientpositive/union36.q @@ -1,9 +1,10 @@ set hive.mapred.mode=nonstrict; set hive.cbo.enable=false; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y; +-- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y; +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u; diff --git a/ql/src/test/queries/clientpositive/unionDistinct_1.q b/ql/src/test/queries/clientpositive/unionDistinct_1.q index 0d53a96..5c52d9b 100644 --- a/ql/src/test/queries/clientpositive/unionDistinct_1.q +++ b/ql/src/test/queries/clientpositive/unionDistinct_1.q @@ -386,16 +386,14 @@ from ( select transform(key, value) using 'cat' as (key2, value2) from src UNION DISTINCT - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src UNION DISTINCT - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; -- union24.q @@ -874,8 +872,7 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT -SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key; +SELECT CAST(key AS BIGINT) AS key FROM t2) a; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 @@ -1013,13 +1010,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; set hive.auto.convert.join=false; -- When we do not convert the Join of sub1 and sub0 into a MapJoin, @@ -1031,13 +1028,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; -- union4.q diff --git a/ql/src/test/queries/clientpositive/union_remove_1.q b/ql/src/test/queries/clientpositive/union_remove_1.q index 702f71a..0a69068 100644 --- a/ql/src/test/queries/clientpositive/union_remove_1.q +++ b/ql/src/test/queries/clientpositive/union_remove_1.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -42,4 +43,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_10.q b/ql/src/test/queries/clientpositive/union_remove_10.q index 508f354..7892e15 100644 --- a/ql/src/test/queries/clientpositive/union_remove_10.q +++ b/ql/src/test/queries/clientpositive/union_remove_10.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce @@ -55,4 +56,4 @@ select * FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_11.q b/ql/src/test/queries/clientpositive/union_remove_11.q index bdfbf66..fd41648 100644 --- a/ql/src/test/queries/clientpositive/union_remove_11.q +++ b/ql/src/test/queries/clientpositive/union_remove_11.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), @@ -55,4 +56,4 @@ select * FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_12.q b/ql/src/test/queries/clientpositive/union_remove_12.q index f6436f5..b665666 100644 --- a/ql/src/test/queries/clientpositive/union_remove_12.q +++ b/ql/src/test/queries/clientpositive/union_remove_12.q @@ -9,6 +9,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_13.q b/ql/src/test/queries/clientpositive/union_remove_13.q index b02451b..11077fd 100644 --- a/ql/src/test/queries/clientpositive/union_remove_13.q +++ b/ql/src/test/queries/clientpositive/union_remove_13.q @@ -9,6 +9,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_14.q b/ql/src/test/queries/clientpositive/union_remove_14.q index bec6226..b559b35 100644 --- a/ql/src/test/queries/clientpositive/union_remove_14.q +++ b/ql/src/test/queries/clientpositive/union_remove_14.q @@ -9,6 +9,7 @@ set hive.auto.convert.join=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), @@ -50,4 +51,4 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_15.q b/ql/src/test/queries/clientpositive/union_remove_15.q index e384739..43c7834 100644 --- a/ql/src/test/queries/clientpositive/union_remove_15.q +++ b/ql/src/test/queries/clientpositive/union_remove_15.q @@ -10,6 +10,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. @@ -50,5 +51,5 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '1' order by key, `values`; -select * from outputTbl1 where ds = '2' order by key, `values`; +select * from outputTbl1 where ds = '1'; +select * from outputTbl1 where ds = '2'; diff --git a/ql/src/test/queries/clientpositive/union_remove_16.q b/ql/src/test/queries/clientpositive/union_remove_16.q index 41305bf..eca8aeb 100644 --- a/ql/src/test/queries/clientpositive/union_remove_16.q +++ b/ql/src/test/queries/clientpositive/union_remove_16.q @@ -11,6 +11,7 @@ set mapred.input.dir.recursive=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. @@ -49,5 +50,5 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '1' order by key, `values`; -select * from outputTbl1 where ds = '2' order by key, `values`; +select * from outputTbl1 where ds = '1'; +select * from outputTbl1 where ds = '2'; diff --git a/ql/src/test/queries/clientpositive/union_remove_17.q b/ql/src/test/queries/clientpositive/union_remove_17.q index 8f91d03..59a3a9c 100644 --- a/ql/src/test/queries/clientpositive/union_remove_17.q +++ b/ql/src/test/queries/clientpositive/union_remove_17.q @@ -10,6 +10,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. @@ -46,5 +47,5 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '1' order by key, `values`; -select * from outputTbl1 where ds = '2' order by key, `values`; +select * from outputTbl1 where ds = '1'; +select * from outputTbl1 where ds = '2'; diff --git a/ql/src/test/queries/clientpositive/union_remove_18.q b/ql/src/test/queries/clientpositive/union_remove_18.q index ad38742..98ee7d0 100644 --- a/ql/src/test/queries/clientpositive/union_remove_18.q +++ b/ql/src/test/queries/clientpositive/union_remove_18.q @@ -10,6 +10,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -49,6 +50,6 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '11' order by key, `values`; -select * from outputTbl1 where ds = '18' order by key, `values`; -select * from outputTbl1 where ds is not null order by key, `values`, ds; +select * from outputTbl1 where ds = '11'; +select * from outputTbl1 where ds = '18'; +select * from outputTbl1 where ds is not null; diff --git a/ql/src/test/queries/clientpositive/union_remove_19.q b/ql/src/test/queries/clientpositive/union_remove_19.q index 34f8b5a..9e47254 100644 --- a/ql/src/test/queries/clientpositive/union_remove_19.q +++ b/ql/src/test/queries/clientpositive/union_remove_19.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -90,4 +91,4 @@ FROM ( ) b where b.key >= 7; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_2.q b/ql/src/test/queries/clientpositive/union_remove_2.q index 274608c..2ebc1ad 100644 --- a/ql/src/test/queries/clientpositive/union_remove_2.q +++ b/ql/src/test/queries/clientpositive/union_remove_2.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. @@ -47,5 +48,5 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_20.q b/ql/src/test/queries/clientpositive/union_remove_20.q index 93a49f3..700ee4d 100644 --- a/ql/src/test/queries/clientpositive/union_remove_20.q +++ b/ql/src/test/queries/clientpositive/union_remove_20.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot @@ -43,4 +44,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_21.q b/ql/src/test/queries/clientpositive/union_remove_21.q index fa8ff27..1b1472e 100644 --- a/ql/src/test/queries/clientpositive/union_remove_21.q +++ b/ql/src/test/queries/clientpositive/union_remove_21.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot @@ -43,4 +44,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_22.q b/ql/src/test/queries/clientpositive/union_remove_22.q index 0e97cae..d4d3cbc 100644 --- a/ql/src/test/queries/clientpositive/union_remove_22.q +++ b/ql/src/test/queries/clientpositive/union_remove_22.q @@ -63,4 +63,4 @@ FROM ( ) a; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_23.q b/ql/src/test/queries/clientpositive/union_remove_23.q index 436719d..98ae503 100644 --- a/ql/src/test/queries/clientpositive/union_remove_23.q +++ b/ql/src/test/queries/clientpositive/union_remove_23.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -45,4 +46,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_24.q b/ql/src/test/queries/clientpositive/union_remove_24.q index 3a1e225..7ed80d1 100644 --- a/ql/src/test/queries/clientpositive/union_remove_24.q +++ b/ql/src/test/queries/clientpositive/union_remove_24.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -41,4 +42,4 @@ SELECT * FROM desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_25.q b/ql/src/test/queries/clientpositive/union_remove_25.q index d70adb9..78fa9aa 100644 --- a/ql/src/test/queries/clientpositive/union_remove_25.q +++ b/ql/src/test/queries/clientpositive/union_remove_25.q @@ -8,6 +8,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -45,7 +46,7 @@ FROM ( desc formatted outputTbl1 partition(ds='2004'); set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; explain insert overwrite table outputTbl2 partition(ds) diff --git a/ql/src/test/queries/clientpositive/union_remove_26.q b/ql/src/test/queries/clientpositive/union_remove_26.q index d35d4e2..4e77d73 100644 --- a/ql/src/test/queries/clientpositive/union_remove_26.q +++ b/ql/src/test/queries/clientpositive/union_remove_26.q @@ -108,4 +108,4 @@ select count(*) from ( UNION ALL SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 UNION ALL - SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t; \ No newline at end of file + SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t; diff --git a/ql/src/test/queries/clientpositive/union_remove_3.q b/ql/src/test/queries/clientpositive/union_remove_3.q index 2e6e3e6..b0f63fc 100644 --- a/ql/src/test/queries/clientpositive/union_remove_3.q +++ b/ql/src/test/queries/clientpositive/union_remove_3.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. @@ -47,5 +48,5 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_4.q b/ql/src/test/queries/clientpositive/union_remove_4.q index a08bcc1..9ee6282 100644 --- a/ql/src/test/queries/clientpositive/union_remove_4.q +++ b/ql/src/test/queries/clientpositive/union_remove_4.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set mapred.input.dir.recursive=true; set hive.merge.smallfiles.avgsize=1; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -43,4 +44,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_5.q b/ql/src/test/queries/clientpositive/union_remove_5.q index 4ac3edf..5d3c482 100644 --- a/ql/src/test/queries/clientpositive/union_remove_5.q +++ b/ql/src/test/queries/clientpositive/union_remove_5.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_6.q b/ql/src/test/queries/clientpositive/union_remove_6.q index f37f409..23eb760 100644 --- a/ql/src/test/queries/clientpositive/union_remove_6.q +++ b/ql/src/test/queries/clientpositive/union_remove_6.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. @@ -38,5 +39,5 @@ insert overwrite table outputTbl1 select * insert overwrite table outputTbl2 select *; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; -select * from outputTbl2 order by key, `values`;; +select * from outputTbl1; +select * from outputTbl2; diff --git a/ql/src/test/queries/clientpositive/union_remove_6_subq.q b/ql/src/test/queries/clientpositive/union_remove_6_subq.q index 6853728..3607a52 100644 --- a/ql/src/test/queries/clientpositive/union_remove_6_subq.q +++ b/ql/src/test/queries/clientpositive/union_remove_6_subq.q @@ -43,8 +43,8 @@ insert overwrite table outputTbl1 select * insert overwrite table outputTbl2 select *; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; -select * from outputTbl2 order by key, `values`; +select * from outputTbl1; +select * from outputTbl2; -- The following queries guarantee the correctness. explain diff --git a/ql/src/test/queries/clientpositive/union_remove_7.q b/ql/src/test/queries/clientpositive/union_remove_7.q index cd6a75a..5e0d14b 100644 --- a/ql/src/test/queries/clientpositive/union_remove_7.q +++ b/ql/src/test/queries/clientpositive/union_remove_7.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -44,4 +45,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_8.q b/ql/src/test/queries/clientpositive/union_remove_8.q index 93aabe6..1e049ba 100644 --- a/ql/src/test/queries/clientpositive/union_remove_8.q +++ b/ql/src/test/queries/clientpositive/union_remove_8.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_9.q b/ql/src/test/queries/clientpositive/union_remove_9.q index 1bb8d6d..b819560 100644 --- a/ql/src/test/queries/clientpositive/union_remove_9.q +++ b/ql/src/test/queries/clientpositive/union_remove_9.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. @@ -53,4 +54,4 @@ select * FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_script.q b/ql/src/test/queries/clientpositive/union_script.q index 7687104..e5b160f 100644 --- a/ql/src/test/queries/clientpositive/union_script.q +++ b/ql/src/test/queries/clientpositive/union_script.q @@ -1,8 +1,9 @@ set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS select * from ( - select transform(key) using 'cat' as cola from src)s order by cola; + select transform(key) using 'cat' as cola from src)s; select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola; + select transform(key) using 'cat' as cola from src) s; diff --git a/ql/src/test/queries/clientpositive/union_type_chk.q b/ql/src/test/queries/clientpositive/union_type_chk.q index a25aeda..ff2e7cf 100644 --- a/ql/src/test/queries/clientpositive/union_type_chk.q +++ b/ql/src/test/queries/clientpositive/union_type_chk.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.cbo.enable=false; -set hive.mapred.mode=nonstrict; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y; +-- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y; +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u; diff --git a/ql/src/test/queries/clientpositive/union_view.q b/ql/src/test/queries/clientpositive/union_view.q index fa39450..c4f63e6 100644 --- a/ql/src/test/queries/clientpositive/union_view.q +++ b/ql/src/test/queries/clientpositive/union_view.q @@ -63,12 +63,13 @@ SELECT key, value, ds FROM src_union_3 EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1'; EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='2'; EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='3'; -EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL order by ds; +EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL; SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1'; SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='2'; SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='3'; -SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL order by ds; +-- SORT_BEFORE_DIFF +SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL; EXPLAIN SELECT count(1) from src_union_view WHERE ds ='1'; EXPLAIN SELECT count(1) from src_union_view WHERE ds ='2'; diff --git a/ql/src/test/results/clientpositive/spark/union23.q.out b/ql/src/test/results/clientpositive/spark/union23.q.out index 01a142b..0e08569 100644 --- a/ql/src/test/results/clientpositive/spark/union23.q.out +++ b/ql/src/test/results/clientpositive/spark/union23.q.out @@ -1,20 +1,20 @@ -PREHOOK: query: explain +PREHOOK: query: -- SORT_QUERY_RESULTS +explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- SORT_QUERY_RESULTS +explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -23,8 +23,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1), Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,11 +41,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + File Output Operator + compressed: false Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map 3 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 Map Operator Tree: TableScan alias: src @@ -56,23 +57,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + File Output Operator + compressed: false Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -86,7 +77,6 @@ from ( from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -96,7 +86,6 @@ from ( from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union32.q.out b/ql/src/test/results/clientpositive/spark/union32.q.out index 09fb01e..16cb243 100644 --- a/ql/src/test/results/clientpositive/spark/union32.q.out +++ b/ql/src/test/results/clientpositive/spark/union32.q.out @@ -38,7 +38,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key PREHOOK: type: QUERY POSTHOOK: query: -- Test simple union with double EXPLAIN @@ -46,7 +45,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -55,8 +53,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1), Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,11 +64,14 @@ STAGE PLANS: expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Map 3 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 Map Operator Tree: TableScan alias: t2 @@ -81,23 +80,13 @@ STAGE PLANS: expressions: UDFToDouble(UDFToLong(key)) (type: double) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/union34.q.out b/ql/src/test/results/clientpositive/spark/union34.q.out index 1a50c3c..c49c436 100644 --- a/ql/src/test/results/clientpositive/spark/union34.q.out +++ b/ql/src/test/results/clientpositive/spark/union34.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: create table src10_1 (key string, value string) +PREHOOK: query: -- SORT_QUERY_RESULTS +create table src10_1 (key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@src10_1 -POSTHOOK: query: create table src10_1 (key string, value string) +POSTHOOK: query: -- SORT_QUERY_RESULTS +create table src10_1 (key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@src10_1 @@ -67,7 +69,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, -- we can use a single MR job to evaluate this entire query. @@ -76,7 +78,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -88,7 +90,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: src10_2 @@ -109,8 +111,6 @@ STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1), Map 4 (SORT, 1), Map 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -133,16 +133,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Map 4 + Map 3 Map Operator Tree: TableScan alias: src10_3 @@ -151,12 +153,14 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 5 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 4 Map Operator Tree: TableScan alias: src10_4 @@ -165,24 +169,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -194,7 +187,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -205,7 +198,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 @@ -251,7 +244,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, -- we need to use two MR jobs to evaluate this query. @@ -262,7 +255,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -272,8 +265,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (SORT, 1), Map 6 (SORT, 1), Reducer 2 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -294,7 +286,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: src10_2 @@ -311,7 +303,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 4 Map Operator Tree: TableScan alias: src10_3 @@ -320,12 +312,14 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 6 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 5 Map Operator Tree: TableScan alias: src10_4 @@ -334,11 +328,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 Reduce Operator Tree: Join Operator @@ -346,17 +342,6 @@ STAGE PLANS: Inner Join 0 to 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE @@ -375,7 +360,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -386,7 +371,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out index 6d88345..ff18e4a 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -190,11 +192,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out index 7b4b0c4..ded8b40 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce -- job), followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce -- job), followed by select star and a file sink. @@ -273,11 +275,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_11.q.out b/ql/src/test/results/clientpositive/spark/union_remove_11.q.out index 5d77d06..0fe1340 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_11.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_11.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), -- followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), -- followed by select star and a file sink. @@ -263,11 +265,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_12.q.out b/ql/src/test/results/clientpositive/spark/union_remove_12.q.out index 6a7fe69..7487cd3 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_12.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_12.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union optimization is applied, and the union is removed. @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union optimization is applied, and the union is removed. @@ -271,11 +273,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_13.q.out b/ql/src/test/results/clientpositive/spark/union_remove_13.q.out index 2d031f9..550c24b 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_13.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_13.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union selectstar optimization should be performed, and the union should be removed. @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union selectstar optimization should be performed, and the union should be removed. @@ -297,11 +299,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -313,8 +315,8 @@ POSTHOOK: Input: default@outputtbl1 3 13 7 1 7 17 -8 2 8 18 8 18 +8 2 8 28 8 28 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_14.q.out b/ql/src/test/results/clientpositive/spark/union_remove_14.q.out index 0be3d86..9002223 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_14.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_14.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), -- followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), -- followed by select star and a file sink. @@ -273,11 +275,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out index c0ad4e6..c4d1542 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -220,12 +222,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -235,12 +237,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 3 1 1 7 1 1 8 2 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out index d02109a..961291b 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -260,12 +262,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -275,12 +277,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 3 1 1 7 1 1 8 2 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_17.q.out b/ql/src/test/results/clientpositive/spark/union_remove_17.q.out index c8047a9..baed263 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_17.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_17.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need for this optimization, since the query is a map-only query. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need for this optimization, since the query is a map-only query. @@ -192,12 +194,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -208,12 +210,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 7 1 1 8 1 1 8 1 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out index 6a6de92..535d085 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, ds string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -234,31 +236,31 @@ ds=13 ds=17 ds=18 ds=28 -PREHOOK: query: select * from outputTbl1 where ds = '11' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '11' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=11 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '11' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '11' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=11 #### A masked pattern was here #### 1 1 11 1 1 11 -PREHOOK: query: select * from outputTbl1 where ds = '18' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '18' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=18 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '18' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '18' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=18 #### A masked pattern was here #### 8 1 18 8 1 18 -PREHOOK: query: select * from outputTbl1 where ds is not null order by key, `values`, ds +PREHOOK: query: select * from outputTbl1 where ds is not null PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=11 @@ -268,7 +270,7 @@ PREHOOK: Input: default@outputtbl1@ds=17 PREHOOK: Input: default@outputtbl1@ds=18 PREHOOK: Input: default@outputtbl1@ds=28 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds is not null order by key, `values`, ds +POSTHOOK: query: select * from outputTbl1 where ds is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=11 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out index 59063a7..8c06c64 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -484,11 +486,11 @@ POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out index 4667e70..755ff3f 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -217,11 +219,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out index b7682a9..bec77a8 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot -- be removed. @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot -- be removed. @@ -200,11 +202,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out index c9bcf36..f5a1caa 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot -- be removed. @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot -- be removed. @@ -186,11 +188,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_22.q.out b/ql/src/test/results/clientpositive/spark/union_remove_22.q.out index 90caa89..c762002 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_22.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_22.q.out @@ -347,11 +347,11 @@ POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl1.values2 EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_23.q.out b/ql/src/test/results/clientpositive/spark/union_remove_23.q.out index 3d51af5..b351893 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_23.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_23.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. One of the sub-queries @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. One of the sub-queries @@ -253,11 +255,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out index 94f71ac..c66691e 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -11,7 +12,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -196,11 +198,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out index eb95cad..ba9d4d1 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -212,12 +214,12 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2004 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2004 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_3.q.out b/ql/src/test/results/clientpositive/spark/union_remove_3.q.out index 66675ce..7692443 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_3.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_3.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->remove->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. -- There is no need for any optimization, since the whole query can be processed in @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->remove->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. -- There is no need for any optimization, since the whole query can be processed in @@ -207,11 +209,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out index 1bf917a..3ab62c8 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -240,11 +242,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out index aabb6c4..9a9caf6 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -269,11 +271,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out index d677d1b..1bdeb09 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -9,7 +10,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -191,11 +193,11 @@ POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(n POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -209,11 +211,11 @@ POSTHOOK: Input: default@outputtbl1 7 1 8 2 8 2 -PREHOOK: query: select * from outputTbl2 order by key, `values` +PREHOOK: query: select * from outputTbl2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl2 order by key, `values` +POSTHOOK: query: select * from outputTbl2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out index dac8247..10de07d 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out @@ -203,11 +203,11 @@ POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(n POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -221,11 +221,11 @@ POSTHOOK: Input: default@outputtbl1 7 1 8 2 8 2 -PREHOOK: query: select * from outputTbl2 order by key, `values` +PREHOOK: query: select * from outputTbl2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl2 order by key, `values` +POSTHOOK: query: select * from outputTbl2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out index 6af69e0..585b61e 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -194,11 +196,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out index f19afad..17079a1 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -221,11 +223,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out index 2e2abf9..0b46bd7 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. -- There is no need for the outer union. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. -- There is no need for the outer union. @@ -277,11 +279,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_script.q.out b/ql/src/test/results/clientpositive/spark/union_script.q.out index e044f63..44ea01b 100644 --- a/ql/src/test/results/clientpositive/spark/union_script.q.out +++ b/ql/src/test/results/clientpositive/spark/union_script.q.out @@ -1,10 +1,12 @@ -PREHOOK: query: select * from ( - select transform(key) using 'cat' as cola from src)s order by cola +PREHOOK: query: -- SORT_QUERY_RESULTS +select * from ( + select transform(key) using 'cat' as cola from src)s PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select * from ( - select transform(key) using 'cat' as cola from src)s order by cola +POSTHOOK: query: -- SORT_QUERY_RESULTS +select * from ( + select transform(key) using 'cat' as cola from src)s POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -511,14 +513,14 @@ POSTHOOK: Input: default@src PREHOOK: query: select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola + select transform(key) using 'cat' as cola from src) s PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola + select transform(key) using 'cat' as cola from src) s POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/union_view.q.out b/ql/src/test/results/clientpositive/spark/union_view.q.out index 892cc6f..a5970c0 100644 --- a/ql/src/test/results/clientpositive/spark/union_view.q.out +++ b/ql/src/test/results/clientpositive/spark/union_view.q.out @@ -406,8 +406,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1), Map 3 (SORT, 1), Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -424,15 +422,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 Map Operator Tree: TableScan alias: src_union_2 @@ -446,15 +446,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 4 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 3 Map Operator Tree: TableScan alias: src_union_3 @@ -468,27 +470,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: 86 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -501,9 +492,9 @@ STAGE PLANS: 86 val_86 2 86 val_86 3 86 val_86 3 -86 val_86 1 86 val_86 2 86 val_86 2 +86 val_86 1 86 val_86 3 86 val_86 3 STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out index dd78248..2c8e57f 100644 --- a/ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out @@ -7174,7 +7174,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY POSTHOOK: query: -- union23.q @@ -7185,7 +7184,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -7197,9 +7195,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -7228,7 +7225,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -7254,16 +7251,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -7286,7 +7273,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -7296,7 +7282,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -12823,7 +12808,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key PREHOOK: type: QUERY POSTHOOK: query: -- Test simple union with double EXPLAIN @@ -12831,7 +12815,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -12843,9 +12826,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -12867,7 +12849,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 4 Map Operator Tree: TableScan alias: t2 @@ -12893,16 +12875,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -13992,7 +13964,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, -- we can use a single MR job to evaluate this entire query. @@ -14001,7 +13973,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -14012,12 +13984,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 6 <- Union 7 (CONTAINS) - Map 9 <- Union 7 (CONTAINS) + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 5 <- Union 6 (CONTAINS) + Map 8 <- Union 6 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 8 <- Union 2 (CONTAINS), Union 7 (SIMPLE_EDGE) + Reducer 7 <- Union 2 (CONTAINS), Union 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -14040,7 +14011,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Group By Operator @@ -14053,7 +14024,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 4 Map Operator Tree: TableScan alias: src10_2 @@ -14070,7 +14041,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 5 Map Operator Tree: TableScan alias: src10_3 @@ -14089,7 +14060,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 9 + Map 8 Map Operator Tree: TableScan alias: src10_4 @@ -14115,17 +14086,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE @@ -14133,7 +14093,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -14152,8 +14112,8 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE Union 2 Vertex: Union 2 - Union 7 - Vertex: Union 7 + Union 6 + Vertex: Union 6 Stage: Stage-0 Fetch Operator @@ -14165,7 +14125,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -14176,7 +14136,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 @@ -14202,7 +14162,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, -- we need to use two MR jobs to evaluate this query. @@ -14213,7 +14173,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -14224,12 +14184,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 10 <- Union 8 (CONTAINS) - Map 7 <- Union 8 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Map 6 <- Union 7 (CONTAINS) + Map 9 <- Union 7 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 9 <- Union 3 (CONTAINS), Union 8 (SIMPLE_EDGE) + Reducer 8 <- Union 3 (CONTAINS), Union 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -14250,26 +14209,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 10 - Map Operator Tree: - TableScan - alias: src10_4 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 5 Map Operator Tree: TableScan alias: src10_2 @@ -14286,7 +14226,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 6 Map Operator Tree: TableScan alias: src10_3 @@ -14305,6 +14245,25 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: src10_4 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -14332,17 +14291,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 5 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE @@ -14350,7 +14298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 8 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -14369,8 +14317,8 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE Union 3 Vertex: Union 3 - Union 8 - Vertex: Union 8 + Union 7 + Vertex: Union 7 Stage: Stage-0 Fetch Operator @@ -14382,7 +14330,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -14393,7 +14341,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 diff --git a/ql/src/test/results/clientpositive/tez/union_type_chk.q.out b/ql/src/test/results/clientpositive/tez/union_type_chk.q.out index 12f060b..1eb0182 100644 --- a/ql/src/test/results/clientpositive/tez/union_type_chk.q.out +++ b/ql/src/test/results/clientpositive/tez/union_type_chk.q.out @@ -1,28 +1,30 @@ -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0.4999999900000002 +0.4999999900000002 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 -0.4999999900000002 -0.4999999900000002 -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL -NULL 0.25 0.25 0.25 0.25 +NULL +NULL diff --git a/ql/src/test/results/clientpositive/union23.q.out b/ql/src/test/results/clientpositive/union23.q.out index e40b88c..4435c93 100644 --- a/ql/src/test/results/clientpositive/union23.q.out +++ b/ql/src/test/results/clientpositive/union23.q.out @@ -1,20 +1,20 @@ -PREHOOK: query: explain +PREHOOK: query: -- SORT_QUERY_RESULTS +explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- SORT_QUERY_RESULTS +explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -40,10 +40,13 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + File Output Operator + compressed: false Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -53,22 +56,13 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + File Output Operator + compressed: false Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -82,7 +76,6 @@ from ( from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -92,7 +85,6 @@ from ( from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union32.q.out b/ql/src/test/results/clientpositive/union32.q.out index f61a313..136a1c3 100644 --- a/ql/src/test/results/clientpositive/union32.q.out +++ b/ql/src/test/results/clientpositive/union32.q.out @@ -38,7 +38,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key PREHOOK: type: QUERY POSTHOOK: query: -- Test simple union with double EXPLAIN @@ -46,7 +45,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -65,10 +63,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -78,22 +79,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/union34.q.out b/ql/src/test/results/clientpositive/union34.q.out index 06f75e7..8d7846c 100644 --- a/ql/src/test/results/clientpositive/union34.q.out +++ b/ql/src/test/results/clientpositive/union34.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: create table src10_1 (key string, value string) +PREHOOK: query: -- SORT_QUERY_RESULTS +create table src10_1 (key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@src10_1 -POSTHOOK: query: create table src10_1 (key string, value string) +POSTHOOK: query: -- SORT_QUERY_RESULTS +create table src10_1 (key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@src10_1 @@ -67,7 +69,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, -- we can use a single MR job to evaluate this entire query. @@ -76,7 +78,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-7 is a root stage @@ -119,11 +121,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -133,11 +137,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -158,25 +164,15 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -188,7 +184,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -199,7 +195,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 @@ -245,7 +241,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, -- we need to use two MR jobs to evaluate this query. @@ -256,7 +252,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -320,11 +316,13 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_3 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -334,11 +332,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -348,23 +348,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -376,7 +366,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -387,7 +377,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 diff --git a/ql/src/test/results/clientpositive/union36.q.out b/ql/src/test/results/clientpositive/union36.q.out index 12f060b..1eb0182 100644 --- a/ql/src/test/results/clientpositive/union36.q.out +++ b/ql/src/test/results/clientpositive/union36.q.out @@ -1,28 +1,30 @@ -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0.4999999900000002 +0.4999999900000002 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 -0.4999999900000002 -0.4999999900000002 -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL -NULL 0.25 0.25 0.25 0.25 +NULL +NULL diff --git a/ql/src/test/results/clientpositive/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/unionDistinct_1.q.out index 1d32a06..8c9ce5e 100644 --- a/ql/src/test/results/clientpositive/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/unionDistinct_1.q.out @@ -7795,7 +7795,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY POSTHOOK: query: -- union23.q @@ -7806,12 +7805,10 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -7870,26 +7867,6 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7908,7 +7885,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -7918,7 +7894,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -13784,7 +13759,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key PREHOOK: type: QUERY POSTHOOK: query: -- Test simple union with double EXPLAIN @@ -13792,12 +13766,10 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -13849,26 +13821,6 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14938,7 +14890,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, -- we can use a single MR job to evaluate this entire query. @@ -14947,17 +14899,16 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-8 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 is a root stage + Stage-7 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -15011,7 +14962,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0-subquery1:$hdt$_0-subquery1:$hdt$_0:src10_1 @@ -15090,27 +15041,6 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -15127,7 +15057,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -15138,7 +15068,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 @@ -15164,7 +15094,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, -- we need to use two MR jobs to evaluate this query. @@ -15175,14 +15105,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -15272,34 +15201,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -15363,7 +15271,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -15374,7 +15282,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 diff --git a/ql/src/test/results/clientpositive/union_remove_1.q.out b/ql/src/test/results/clientpositive/union_remove_1.q.out index 1bd471d..9b35b5f 100644 --- a/ql/src/test/results/clientpositive/union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/union_remove_1.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -206,11 +208,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_10.q.out b/ql/src/test/results/clientpositive/union_remove_10.q.out index 14645f0..e98a907 100644 --- a/ql/src/test/results/clientpositive/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/union_remove_10.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce -- job), followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce -- job), followed by select star and a file sink. @@ -265,11 +267,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_11.q.out b/ql/src/test/results/clientpositive/union_remove_11.q.out index 5696383..9820267 100644 --- a/ql/src/test/results/clientpositive/union_remove_11.q.out +++ b/ql/src/test/results/clientpositive/union_remove_11.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), -- followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), -- followed by select star and a file sink. @@ -254,11 +256,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_12.q.out b/ql/src/test/results/clientpositive/union_remove_12.q.out index 1aa21eb..46b6895 100644 --- a/ql/src/test/results/clientpositive/union_remove_12.q.out +++ b/ql/src/test/results/clientpositive/union_remove_12.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union optimization is applied, and the union is removed. @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union optimization is applied, and the union is removed. @@ -253,11 +255,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_13.q.out b/ql/src/test/results/clientpositive/union_remove_13.q.out index 1bf411f..4639602 100644 --- a/ql/src/test/results/clientpositive/union_remove_13.q.out +++ b/ql/src/test/results/clientpositive/union_remove_13.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union selectstar optimization should be performed, and the union should be removed. @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union selectstar optimization should be performed, and the union should be removed. @@ -276,11 +278,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -292,8 +294,8 @@ POSTHOOK: Input: default@outputtbl1 3 13 7 1 7 17 -8 2 8 18 8 18 +8 2 8 28 8 28 diff --git a/ql/src/test/results/clientpositive/union_remove_14.q.out b/ql/src/test/results/clientpositive/union_remove_14.q.out index 6c89b92..fcb091e 100644 --- a/ql/src/test/results/clientpositive/union_remove_14.q.out +++ b/ql/src/test/results/clientpositive/union_remove_14.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), -- followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), -- followed by select star and a file sink. @@ -255,11 +257,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_15.q.out b/ql/src/test/results/clientpositive/union_remove_15.q.out index a259df8..43a75af 100644 --- a/ql/src/test/results/clientpositive/union_remove_15.q.out +++ b/ql/src/test/results/clientpositive/union_remove_15.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -236,12 +238,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -251,12 +253,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 3 1 1 7 1 1 8 2 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/union_remove_16.q.out b/ql/src/test/results/clientpositive/union_remove_16.q.out index c7a08f3..174a1a0 100644 --- a/ql/src/test/results/clientpositive/union_remove_16.q.out +++ b/ql/src/test/results/clientpositive/union_remove_16.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -268,12 +270,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -283,12 +285,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 3 1 1 7 1 1 8 2 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/union_remove_17.q.out b/ql/src/test/results/clientpositive/union_remove_17.q.out index 688e365..5cde83e 100644 --- a/ql/src/test/results/clientpositive/union_remove_17.q.out +++ b/ql/src/test/results/clientpositive/union_remove_17.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need for this optimization, since the query is a map-only query. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need for this optimization, since the query is a map-only query. @@ -191,12 +193,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -207,12 +209,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 7 1 1 8 1 1 8 1 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/union_remove_18.q.out b/ql/src/test/results/clientpositive/union_remove_18.q.out index 96daa12..d5b5a17 100644 --- a/ql/src/test/results/clientpositive/union_remove_18.q.out +++ b/ql/src/test/results/clientpositive/union_remove_18.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, ds string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -250,31 +252,31 @@ ds=13 ds=17 ds=18 ds=28 -PREHOOK: query: select * from outputTbl1 where ds = '11' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '11' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=11 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '11' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '11' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=11 #### A masked pattern was here #### 1 1 11 1 1 11 -PREHOOK: query: select * from outputTbl1 where ds = '18' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '18' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=18 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '18' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '18' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=18 #### A masked pattern was here #### 8 1 18 8 1 18 -PREHOOK: query: select * from outputTbl1 where ds is not null order by key, `values`, ds +PREHOOK: query: select * from outputTbl1 where ds is not null PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=11 @@ -284,7 +286,7 @@ PREHOOK: Input: default@outputtbl1@ds=17 PREHOOK: Input: default@outputtbl1@ds=18 PREHOOK: Input: default@outputtbl1@ds=28 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds is not null order by key, `values`, ds +POSTHOOK: query: select * from outputTbl1 where ds is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=11 diff --git a/ql/src/test/results/clientpositive/union_remove_19.q.out b/ql/src/test/results/clientpositive/union_remove_19.q.out index 1a40efa..20f7e0b 100644 --- a/ql/src/test/results/clientpositive/union_remove_19.q.out +++ b/ql/src/test/results/clientpositive/union_remove_19.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -538,11 +540,11 @@ POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_2.q.out b/ql/src/test/results/clientpositive/union_remove_2.q.out index e5de3c6..b889be5 100644 --- a/ql/src/test/results/clientpositive/union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/union_remove_2.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -217,11 +219,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_20.q.out b/ql/src/test/results/clientpositive/union_remove_20.q.out index 96d76d4..746cbf3 100644 --- a/ql/src/test/results/clientpositive/union_remove_20.q.out +++ b/ql/src/test/results/clientpositive/union_remove_20.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot -- be removed. @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot -- be removed. @@ -216,11 +218,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_21.q.out b/ql/src/test/results/clientpositive/union_remove_21.q.out index 1356777..c143cc8 100644 --- a/ql/src/test/results/clientpositive/union_remove_21.q.out +++ b/ql/src/test/results/clientpositive/union_remove_21.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot -- be removed. @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot -- be removed. @@ -200,11 +202,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_22.q.out b/ql/src/test/results/clientpositive/union_remove_22.q.out index 933a0e8..d269270 100644 --- a/ql/src/test/results/clientpositive/union_remove_22.q.out +++ b/ql/src/test/results/clientpositive/union_remove_22.q.out @@ -379,11 +379,11 @@ POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl1.values2 EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_23.q.out b/ql/src/test/results/clientpositive/union_remove_23.q.out index b00e5d1..585cdbb 100644 --- a/ql/src/test/results/clientpositive/union_remove_23.q.out +++ b/ql/src/test/results/clientpositive/union_remove_23.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. One of the sub-queries @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. One of the sub-queries @@ -256,11 +258,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_24.q.out b/ql/src/test/results/clientpositive/union_remove_24.q.out index 95bf66b..8e266fd 100644 --- a/ql/src/test/results/clientpositive/union_remove_24.q.out +++ b/ql/src/test/results/clientpositive/union_remove_24.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -11,7 +12,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -212,11 +214,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_25.q.out b/ql/src/test/results/clientpositive/union_remove_25.q.out index 3869735..2785022 100644 --- a/ql/src/test/results/clientpositive/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/union_remove_25.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -228,12 +230,12 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2004 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2004 diff --git a/ql/src/test/results/clientpositive/union_remove_3.q.out b/ql/src/test/results/clientpositive/union_remove_3.q.out index a95a48e..5e117bd 100644 --- a/ql/src/test/results/clientpositive/union_remove_3.q.out +++ b/ql/src/test/results/clientpositive/union_remove_3.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->remove->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. -- There is no need for any optimization, since the whole query can be processed in @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->remove->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. -- There is no need for any optimization, since the whole query can be processed in @@ -206,11 +208,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_4.q.out b/ql/src/test/results/clientpositive/union_remove_4.q.out index 818ae80..0004ed0 100644 --- a/ql/src/test/results/clientpositive/union_remove_4.q.out +++ b/ql/src/test/results/clientpositive/union_remove_4.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -250,11 +252,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_5.q.out b/ql/src/test/results/clientpositive/union_remove_5.q.out index d6c2b99..f88d1d2 100644 --- a/ql/src/test/results/clientpositive/union_remove_5.q.out +++ b/ql/src/test/results/clientpositive/union_remove_5.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -263,11 +265,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_6.q.out b/ql/src/test/results/clientpositive/union_remove_6.q.out index 1b076c2..0a80e63 100644 --- a/ql/src/test/results/clientpositive/union_remove_6.q.out +++ b/ql/src/test/results/clientpositive/union_remove_6.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -9,7 +10,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -230,11 +232,11 @@ POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(n POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -248,11 +250,11 @@ POSTHOOK: Input: default@outputtbl1 7 1 8 2 8 2 -PREHOOK: query: select * from outputTbl2 order by key, `values` +PREHOOK: query: select * from outputTbl2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl2 order by key, `values` +POSTHOOK: query: select * from outputTbl2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out index b1e79e7..6439a0f 100644 --- a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -242,11 +242,11 @@ POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(n POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -260,11 +260,11 @@ POSTHOOK: Input: default@outputtbl1 7 1 8 2 8 2 -PREHOOK: query: select * from outputTbl2 order by key, `values` +PREHOOK: query: select * from outputTbl2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl2 order by key, `values` +POSTHOOK: query: select * from outputTbl2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_7.q.out b/ql/src/test/results/clientpositive/union_remove_7.q.out index 5541cb0..0a2dc62 100644 --- a/ql/src/test/results/clientpositive/union_remove_7.q.out +++ b/ql/src/test/results/clientpositive/union_remove_7.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -210,11 +212,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_8.q.out b/ql/src/test/results/clientpositive/union_remove_8.q.out index 22e6f15..3003b3c 100644 --- a/ql/src/test/results/clientpositive/union_remove_8.q.out +++ b/ql/src/test/results/clientpositive/union_remove_8.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -221,11 +223,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_9.q.out b/ql/src/test/results/clientpositive/union_remove_9.q.out index 70b5ee9..deb2a34 100644 --- a/ql/src/test/results/clientpositive/union_remove_9.q.out +++ b/ql/src/test/results/clientpositive/union_remove_9.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. -- There is no need for the outer union. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. -- There is no need for the outer union. @@ -268,11 +270,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_script.q.out b/ql/src/test/results/clientpositive/union_script.q.out index e044f63..44ea01b 100644 --- a/ql/src/test/results/clientpositive/union_script.q.out +++ b/ql/src/test/results/clientpositive/union_script.q.out @@ -1,10 +1,12 @@ -PREHOOK: query: select * from ( - select transform(key) using 'cat' as cola from src)s order by cola +PREHOOK: query: -- SORT_QUERY_RESULTS +select * from ( + select transform(key) using 'cat' as cola from src)s PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select * from ( - select transform(key) using 'cat' as cola from src)s order by cola +POSTHOOK: query: -- SORT_QUERY_RESULTS +select * from ( + select transform(key) using 'cat' as cola from src)s POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -511,14 +513,14 @@ POSTHOOK: Input: default@src PREHOOK: query: select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola + select transform(key) using 'cat' as cola from src) s PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola + select transform(key) using 'cat' as cola from src) s POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_type_chk.q.out b/ql/src/test/results/clientpositive/union_type_chk.q.out index 12f060b..1eb0182 100644 --- a/ql/src/test/results/clientpositive/union_type_chk.q.out +++ b/ql/src/test/results/clientpositive/union_type_chk.q.out @@ -1,28 +1,30 @@ -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0.4999999900000002 +0.4999999900000002 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 -0.4999999900000002 -0.4999999900000002 -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL -NULL 0.25 0.25 0.25 0.25 +NULL +NULL diff --git a/ql/src/test/results/clientpositive/union_view.q.out b/ql/src/test/results/clientpositive/union_view.q.out index 2c30f0d..985ef9d 100644 --- a/ql/src/test/results/clientpositive/union_view.q.out +++ b/ql/src/test/results/clientpositive/union_view.q.out @@ -535,14 +535,16 @@ STAGE PLANS: Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src_union_2 filterExpr: ((key = 86) and ds is not null) (type: boolean) @@ -557,14 +559,16 @@ STAGE PLANS: Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src_union_3 filterExpr: ((key = 86) and ds is not null) (type: boolean) @@ -579,26 +583,16 @@ STAGE PLANS: Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: 86 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Map Reduce