diff --git a/ql/src/test/queries/clientpositive/union23.q b/ql/src/test/queries/clientpositive/union23.q index 052f34a..0574ceb 100644 --- a/ql/src/test/queries/clientpositive/union23.q +++ b/ql/src/test/queries/clientpositive/union23.q @@ -1,18 +1,17 @@ set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; diff --git a/ql/src/test/queries/clientpositive/union32.q b/ql/src/test/queries/clientpositive/union32.q index f47f0af..b18d484 100644 --- a/ql/src/test/queries/clientpositive/union32.q +++ b/ql/src/test/queries/clientpositive/union32.q @@ -12,8 +12,7 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL -SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key; +SELECT CAST(key AS BIGINT) AS key FROM t2) a; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 diff --git a/ql/src/test/queries/clientpositive/union34.q b/ql/src/test/queries/clientpositive/union34.q index 2ab16de..dbefc40 100644 --- a/ql/src/test/queries/clientpositive/union34.q +++ b/ql/src/test/queries/clientpositive/union34.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS create table src10_1 (key string, value string); create table src10_2 (key string, value string); create table src10_3 (key string, value string); @@ -18,13 +19,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; set hive.auto.convert.join=false; -- When we do not convert the Join of sub1 and sub0 into a MapJoin, @@ -36,10 +37,10 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; diff --git a/ql/src/test/queries/clientpositive/union36.q b/ql/src/test/queries/clientpositive/union36.q index c38e7b1..b79ff0f 100644 --- a/ql/src/test/queries/clientpositive/union36.q +++ b/ql/src/test/queries/clientpositive/union36.q @@ -1,9 +1,10 @@ set hive.mapred.mode=nonstrict; set hive.cbo.enable=false; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y; +-- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y; +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u; diff --git a/ql/src/test/queries/clientpositive/unionDistinct_1.q b/ql/src/test/queries/clientpositive/unionDistinct_1.q index 0d53a96..5c52d9b 100644 --- a/ql/src/test/queries/clientpositive/unionDistinct_1.q +++ b/ql/src/test/queries/clientpositive/unionDistinct_1.q @@ -386,16 +386,14 @@ from ( select transform(key, value) using 'cat' as (key2, value2) from src UNION DISTINCT - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src UNION DISTINCT - select key as key2, value as value2 from src) s -order by s.key2, s.value2; + select key as key2, value as value2 from src) s; -- union24.q @@ -874,8 +872,7 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT -SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key; +SELECT CAST(key AS BIGINT) AS key FROM t2) a; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 @@ -1013,13 +1010,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; set hive.auto.convert.join=false; -- When we do not convert the Join of sub1 and sub0 into a MapJoin, @@ -1031,13 +1028,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key; +) alias1; -- union4.q diff --git a/ql/src/test/queries/clientpositive/union_remove_1.q b/ql/src/test/queries/clientpositive/union_remove_1.q index 702f71a..0a69068 100644 --- a/ql/src/test/queries/clientpositive/union_remove_1.q +++ b/ql/src/test/queries/clientpositive/union_remove_1.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -42,4 +43,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_10.q b/ql/src/test/queries/clientpositive/union_remove_10.q index 508f354..7892e15 100644 --- a/ql/src/test/queries/clientpositive/union_remove_10.q +++ b/ql/src/test/queries/clientpositive/union_remove_10.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce @@ -55,4 +56,4 @@ select * FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_11.q b/ql/src/test/queries/clientpositive/union_remove_11.q index bdfbf66..fd41648 100644 --- a/ql/src/test/queries/clientpositive/union_remove_11.q +++ b/ql/src/test/queries/clientpositive/union_remove_11.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), @@ -55,4 +56,4 @@ select * FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_12.q b/ql/src/test/queries/clientpositive/union_remove_12.q index f6436f5..b665666 100644 --- a/ql/src/test/queries/clientpositive/union_remove_12.q +++ b/ql/src/test/queries/clientpositive/union_remove_12.q @@ -9,6 +9,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_13.q b/ql/src/test/queries/clientpositive/union_remove_13.q index b02451b..11077fd 100644 --- a/ql/src/test/queries/clientpositive/union_remove_13.q +++ b/ql/src/test/queries/clientpositive/union_remove_13.q @@ -9,6 +9,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_14.q b/ql/src/test/queries/clientpositive/union_remove_14.q index bec6226..b559b35 100644 --- a/ql/src/test/queries/clientpositive/union_remove_14.q +++ b/ql/src/test/queries/clientpositive/union_remove_14.q @@ -9,6 +9,7 @@ set hive.auto.convert.join=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), @@ -50,4 +51,4 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_15.q b/ql/src/test/queries/clientpositive/union_remove_15.q index e384739..43c7834 100644 --- a/ql/src/test/queries/clientpositive/union_remove_15.q +++ b/ql/src/test/queries/clientpositive/union_remove_15.q @@ -10,6 +10,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. @@ -50,5 +51,5 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '1' order by key, `values`; -select * from outputTbl1 where ds = '2' order by key, `values`; +select * from outputTbl1 where ds = '1'; +select * from outputTbl1 where ds = '2'; diff --git a/ql/src/test/queries/clientpositive/union_remove_16.q b/ql/src/test/queries/clientpositive/union_remove_16.q index 41305bf..eca8aeb 100644 --- a/ql/src/test/queries/clientpositive/union_remove_16.q +++ b/ql/src/test/queries/clientpositive/union_remove_16.q @@ -11,6 +11,7 @@ set mapred.input.dir.recursive=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. @@ -49,5 +50,5 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '1' order by key, `values`; -select * from outputTbl1 where ds = '2' order by key, `values`; +select * from outputTbl1 where ds = '1'; +select * from outputTbl1 where ds = '2'; diff --git a/ql/src/test/queries/clientpositive/union_remove_17.q b/ql/src/test/queries/clientpositive/union_remove_17.q index 8f91d03..59a3a9c 100644 --- a/ql/src/test/queries/clientpositive/union_remove_17.q +++ b/ql/src/test/queries/clientpositive/union_remove_17.q @@ -10,6 +10,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. @@ -46,5 +47,5 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '1' order by key, `values`; -select * from outputTbl1 where ds = '2' order by key, `values`; +select * from outputTbl1 where ds = '1'; +select * from outputTbl1 where ds = '2'; diff --git a/ql/src/test/queries/clientpositive/union_remove_18.q b/ql/src/test/queries/clientpositive/union_remove_18.q index ad38742..98ee7d0 100644 --- a/ql/src/test/queries/clientpositive/union_remove_18.q +++ b/ql/src/test/queries/clientpositive/union_remove_18.q @@ -10,6 +10,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -49,6 +50,6 @@ desc formatted outputTbl1; show partitions outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 where ds = '11' order by key, `values`; -select * from outputTbl1 where ds = '18' order by key, `values`; -select * from outputTbl1 where ds is not null order by key, `values`, ds; +select * from outputTbl1 where ds = '11'; +select * from outputTbl1 where ds = '18'; +select * from outputTbl1 where ds is not null; diff --git a/ql/src/test/queries/clientpositive/union_remove_19.q b/ql/src/test/queries/clientpositive/union_remove_19.q index 34f8b5a..9e47254 100644 --- a/ql/src/test/queries/clientpositive/union_remove_19.q +++ b/ql/src/test/queries/clientpositive/union_remove_19.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -90,4 +91,4 @@ FROM ( ) b where b.key >= 7; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_2.q b/ql/src/test/queries/clientpositive/union_remove_2.q index 274608c..2ebc1ad 100644 --- a/ql/src/test/queries/clientpositive/union_remove_2.q +++ b/ql/src/test/queries/clientpositive/union_remove_2.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. @@ -47,5 +48,5 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_20.q b/ql/src/test/queries/clientpositive/union_remove_20.q index 93a49f3..700ee4d 100644 --- a/ql/src/test/queries/clientpositive/union_remove_20.q +++ b/ql/src/test/queries/clientpositive/union_remove_20.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot @@ -43,4 +44,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_21.q b/ql/src/test/queries/clientpositive/union_remove_21.q index fa8ff27..1b1472e 100644 --- a/ql/src/test/queries/clientpositive/union_remove_21.q +++ b/ql/src/test/queries/clientpositive/union_remove_21.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot @@ -43,4 +44,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_22.q b/ql/src/test/queries/clientpositive/union_remove_22.q index 0e97cae..d4d3cbc 100644 --- a/ql/src/test/queries/clientpositive/union_remove_22.q +++ b/ql/src/test/queries/clientpositive/union_remove_22.q @@ -63,4 +63,4 @@ FROM ( ) a; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_23.q b/ql/src/test/queries/clientpositive/union_remove_23.q index 436719d..98ae503 100644 --- a/ql/src/test/queries/clientpositive/union_remove_23.q +++ b/ql/src/test/queries/clientpositive/union_remove_23.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -45,4 +46,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_24.q b/ql/src/test/queries/clientpositive/union_remove_24.q index 3a1e225..7ed80d1 100644 --- a/ql/src/test/queries/clientpositive/union_remove_24.q +++ b/ql/src/test/queries/clientpositive/union_remove_24.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -41,4 +42,4 @@ SELECT * FROM desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_25.q b/ql/src/test/queries/clientpositive/union_remove_25.q index d70adb9..78fa9aa 100644 --- a/ql/src/test/queries/clientpositive/union_remove_25.q +++ b/ql/src/test/queries/clientpositive/union_remove_25.q @@ -8,6 +8,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -45,7 +46,7 @@ FROM ( desc formatted outputTbl1 partition(ds='2004'); set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; explain insert overwrite table outputTbl2 partition(ds) diff --git a/ql/src/test/queries/clientpositive/union_remove_26.q b/ql/src/test/queries/clientpositive/union_remove_26.q index d35d4e2..4e77d73 100644 --- a/ql/src/test/queries/clientpositive/union_remove_26.q +++ b/ql/src/test/queries/clientpositive/union_remove_26.q @@ -108,4 +108,4 @@ select count(*) from ( UNION ALL SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 UNION ALL - SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t; \ No newline at end of file + SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t; diff --git a/ql/src/test/queries/clientpositive/union_remove_3.q b/ql/src/test/queries/clientpositive/union_remove_3.q index 2e6e3e6..b0f63fc 100644 --- a/ql/src/test/queries/clientpositive/union_remove_3.q +++ b/ql/src/test/queries/clientpositive/union_remove_3.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. @@ -47,5 +48,5 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_4.q b/ql/src/test/queries/clientpositive/union_remove_4.q index a08bcc1..9ee6282 100644 --- a/ql/src/test/queries/clientpositive/union_remove_4.q +++ b/ql/src/test/queries/clientpositive/union_remove_4.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set mapred.input.dir.recursive=true; set hive.merge.smallfiles.avgsize=1; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -43,4 +44,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_5.q b/ql/src/test/queries/clientpositive/union_remove_5.q index 4ac3edf..5d3c482 100644 --- a/ql/src/test/queries/clientpositive/union_remove_5.q +++ b/ql/src/test/queries/clientpositive/union_remove_5.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_6.q b/ql/src/test/queries/clientpositive/union_remove_6.q index f37f409..23eb760 100644 --- a/ql/src/test/queries/clientpositive/union_remove_6.q +++ b/ql/src/test/queries/clientpositive/union_remove_6.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. @@ -38,5 +39,5 @@ insert overwrite table outputTbl1 select * insert overwrite table outputTbl2 select *; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; -select * from outputTbl2 order by key, `values`;; +select * from outputTbl1; +select * from outputTbl2; diff --git a/ql/src/test/queries/clientpositive/union_remove_6_subq.q b/ql/src/test/queries/clientpositive/union_remove_6_subq.q index 6853728..3607a52 100644 --- a/ql/src/test/queries/clientpositive/union_remove_6_subq.q +++ b/ql/src/test/queries/clientpositive/union_remove_6_subq.q @@ -43,8 +43,8 @@ insert overwrite table outputTbl1 select * insert overwrite table outputTbl2 select *; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; -select * from outputTbl2 order by key, `values`; +select * from outputTbl1; +select * from outputTbl2; -- The following queries guarantee the correctness. explain diff --git a/ql/src/test/queries/clientpositive/union_remove_7.q b/ql/src/test/queries/clientpositive/union_remove_7.q index cd6a75a..5e0d14b 100644 --- a/ql/src/test/queries/clientpositive/union_remove_7.q +++ b/ql/src/test/queries/clientpositive/union_remove_7.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them @@ -44,4 +45,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_8.q b/ql/src/test/queries/clientpositive/union_remove_8.q index 93aabe6..1e049ba 100644 --- a/ql/src/test/queries/clientpositive/union_remove_8.q +++ b/ql/src/test/queries/clientpositive/union_remove_8.q @@ -7,6 +7,7 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. @@ -49,4 +50,4 @@ FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_remove_9.q b/ql/src/test/queries/clientpositive/union_remove_9.q index 1bb8d6d..b819560 100644 --- a/ql/src/test/queries/clientpositive/union_remove_9.q +++ b/ql/src/test/queries/clientpositive/union_remove_9.q @@ -8,6 +8,7 @@ set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. @@ -53,4 +54,4 @@ select * FROM ( desc formatted outputTbl1; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select * from outputTbl1 order by key, `values`; +select * from outputTbl1; diff --git a/ql/src/test/queries/clientpositive/union_script.q b/ql/src/test/queries/clientpositive/union_script.q index 7687104..e5b160f 100644 --- a/ql/src/test/queries/clientpositive/union_script.q +++ b/ql/src/test/queries/clientpositive/union_script.q @@ -1,8 +1,9 @@ set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS select * from ( - select transform(key) using 'cat' as cola from src)s order by cola; + select transform(key) using 'cat' as cola from src)s; select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola; + select transform(key) using 'cat' as cola from src) s; diff --git a/ql/src/test/queries/clientpositive/union_type_chk.q b/ql/src/test/queries/clientpositive/union_type_chk.q index a25aeda..ff2e7cf 100644 --- a/ql/src/test/queries/clientpositive/union_type_chk.q +++ b/ql/src/test/queries/clientpositive/union_type_chk.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.cbo.enable=false; -set hive.mapred.mode=nonstrict; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y; +-- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y; +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u; diff --git a/ql/src/test/queries/clientpositive/union_view.q b/ql/src/test/queries/clientpositive/union_view.q index fa39450..89d580c 100644 --- a/ql/src/test/queries/clientpositive/union_view.q +++ b/ql/src/test/queries/clientpositive/union_view.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.stats.dbclass=fs; set hive.explain.user=false; +-- SORT_QUERY_RESULTS CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string); CREATE INDEX src_union_1_key_idx ON TABLE src_union_1(key) AS 'COMPACT' WITH DEFERRED REBUILD; @@ -63,12 +64,12 @@ SELECT key, value, ds FROM src_union_3 EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1'; EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='2'; EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='3'; -EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL order by ds; +EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL; SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1'; SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='2'; SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='3'; -SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL order by ds; +SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds IS NOT NULL; EXPLAIN SELECT count(1) from src_union_view WHERE ds ='1'; EXPLAIN SELECT count(1) from src_union_view WHERE ds ='2'; diff --git a/ql/src/test/results/clientpositive/union23.q.out b/ql/src/test/results/clientpositive/union23.q.out index e40b88c..4435c93 100644 --- a/ql/src/test/results/clientpositive/union23.q.out +++ b/ql/src/test/results/clientpositive/union23.q.out @@ -1,20 +1,20 @@ -PREHOOK: query: explain +PREHOOK: query: -- SORT_QUERY_RESULTS +explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- SORT_QUERY_RESULTS +explain select s.key2, s.value2 from ( select transform(key, value) using 'cat' as (key2, value2) from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -40,10 +40,13 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + File Output Operator + compressed: false Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -53,22 +56,13 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + File Output Operator + compressed: false Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -82,7 +76,6 @@ from ( from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -92,7 +85,6 @@ from ( from src union all select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union32.q.out b/ql/src/test/results/clientpositive/union32.q.out index f61a313..136a1c3 100644 --- a/ql/src/test/results/clientpositive/union32.q.out +++ b/ql/src/test/results/clientpositive/union32.q.out @@ -38,7 +38,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key PREHOOK: type: QUERY POSTHOOK: query: -- Test simple union with double EXPLAIN @@ -46,7 +45,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -65,10 +63,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -78,22 +79,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/union34.q.out b/ql/src/test/results/clientpositive/union34.q.out index 06f75e7..8d7846c 100644 --- a/ql/src/test/results/clientpositive/union34.q.out +++ b/ql/src/test/results/clientpositive/union34.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: create table src10_1 (key string, value string) +PREHOOK: query: -- SORT_QUERY_RESULTS +create table src10_1 (key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@src10_1 -POSTHOOK: query: create table src10_1 (key string, value string) +POSTHOOK: query: -- SORT_QUERY_RESULTS +create table src10_1 (key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@src10_1 @@ -67,7 +69,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, -- we can use a single MR job to evaluate this entire query. @@ -76,7 +78,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-7 is a root stage @@ -119,11 +121,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -133,11 +137,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -158,25 +164,15 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -188,7 +184,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -199,7 +195,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 @@ -245,7 +241,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, -- we need to use two MR jobs to evaluate this query. @@ -256,7 +252,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -320,11 +316,13 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_3 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -334,11 +332,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -348,23 +348,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -376,7 +366,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -387,7 +377,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 diff --git a/ql/src/test/results/clientpositive/union36.q.out b/ql/src/test/results/clientpositive/union36.q.out index 12f060b..1eb0182 100644 --- a/ql/src/test/results/clientpositive/union36.q.out +++ b/ql/src/test/results/clientpositive/union36.q.out @@ -1,28 +1,30 @@ -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0.4999999900000002 +0.4999999900000002 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 -0.4999999900000002 -0.4999999900000002 -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL -NULL 0.25 0.25 0.25 0.25 +NULL +NULL diff --git a/ql/src/test/results/clientpositive/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/unionDistinct_1.q.out index 1d32a06..8c9ce5e 100644 --- a/ql/src/test/results/clientpositive/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/unionDistinct_1.q.out @@ -7795,7 +7795,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY POSTHOOK: query: -- union23.q @@ -7806,12 +7805,10 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -7870,26 +7867,6 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7908,7 +7885,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -7918,7 +7894,6 @@ from ( from src UNION DISTINCT select key as key2, value as value2 from src) s -order by s.key2, s.value2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -13784,7 +13759,6 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key PREHOOK: type: QUERY POSTHOOK: query: -- Test simple union with double EXPLAIN @@ -13792,12 +13766,10 @@ SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION DISTINCT SELECT CAST(key AS BIGINT) AS key FROM t2) a -ORDER BY key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -13849,26 +13821,6 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -14938,7 +14890,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, -- we can use a single MR job to evaluate this entire query. @@ -14947,17 +14899,16 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-8 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 is a root stage + Stage-7 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -15011,7 +14962,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0-subquery1:$hdt$_0-subquery1:$hdt$_0:src10_1 @@ -15090,27 +15041,6 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -15127,7 +15057,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -15138,7 +15068,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 @@ -15164,7 +15094,7 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, -- we need to use two MR jobs to evaluate this query. @@ -15175,14 +15105,13 @@ SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -15272,34 +15201,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -15363,7 +15271,7 @@ PREHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 PREHOOK: type: QUERY PREHOOK: Input: default@src10_1 PREHOOK: Input: default@src10_2 @@ -15374,7 +15282,7 @@ POSTHOOK: query: SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION DISTINCT SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0 -) alias1 order by key +) alias1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src10_1 POSTHOOK: Input: default@src10_2 diff --git a/ql/src/test/results/clientpositive/union_remove_1.q.out b/ql/src/test/results/clientpositive/union_remove_1.q.out index 1bd471d..9b35b5f 100644 --- a/ql/src/test/results/clientpositive/union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/union_remove_1.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -206,11 +208,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_10.q.out b/ql/src/test/results/clientpositive/union_remove_10.q.out index 14645f0..e98a907 100644 --- a/ql/src/test/results/clientpositive/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/union_remove_10.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce -- job), followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where one of the sub-queries requires a map-reduce -- job), followed by select star and a file sink. @@ -265,11 +267,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_11.q.out b/ql/src/test/results/clientpositive/union_remove_11.q.out index 5696383..9820267 100644 --- a/ql/src/test/results/clientpositive/union_remove_11.q.out +++ b/ql/src/test/results/clientpositive/union_remove_11.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), -- followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a nested union where also contains map only sub-queries), -- followed by select star and a file sink. @@ -254,11 +256,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_12.q.out b/ql/src/test/results/clientpositive/union_remove_12.q.out index 1aa21eb..46b6895 100644 --- a/ql/src/test/results/clientpositive/union_remove_12.q.out +++ b/ql/src/test/results/clientpositive/union_remove_12.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union optimization is applied, and the union is removed. @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union optimization is applied, and the union is removed. @@ -253,11 +255,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_13.q.out b/ql/src/test/results/clientpositive/union_remove_13.q.out index 1bf411f..4639602 100644 --- a/ql/src/test/results/clientpositive/union_remove_13.q.out +++ b/ql/src/test/results/clientpositive/union_remove_13.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union selectstar optimization should be performed, and the union should be removed. @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a mapred query, and the -- other one is a map-join query), followed by select star and a file sink. -- The union selectstar optimization should be performed, and the union should be removed. @@ -276,11 +278,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -292,8 +294,8 @@ POSTHOOK: Input: default@outputtbl1 3 13 7 1 7 17 -8 2 8 18 8 18 +8 2 8 28 8 28 diff --git a/ql/src/test/results/clientpositive/union_remove_14.q.out b/ql/src/test/results/clientpositive/union_remove_14.q.out index 6c89b92..fcb091e 100644 --- a/ql/src/test/results/clientpositive/union_remove_14.q.out +++ b/ql/src/test/results/clientpositive/union_remove_14.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), -- followed by select star and a file sink. @@ -16,7 +17,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which is a map-only query, and the -- other one contains a join, which should be performed as a map-join query at runtime), -- followed by select star and a file sink. @@ -255,11 +257,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_15.q.out b/ql/src/test/results/clientpositive/union_remove_15.q.out index a259df8..43a75af 100644 --- a/ql/src/test/results/clientpositive/union_remove_15.q.out +++ b/ql/src/test/results/clientpositive/union_remove_15.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -236,12 +238,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -251,12 +253,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 3 1 1 7 1 1 8 2 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/union_remove_16.q.out b/ql/src/test/results/clientpositive/union_remove_16.q.out index c7a08f3..174a1a0 100644 --- a/ql/src/test/results/clientpositive/union_remove_16.q.out +++ b/ql/src/test/results/clientpositive/union_remove_16.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -268,12 +270,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -283,12 +285,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 3 1 1 7 1 1 8 2 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/union_remove_17.q.out b/ql/src/test/results/clientpositive/union_remove_17.q.out index 688e365..5cde83e 100644 --- a/ql/src/test/results/clientpositive/union_remove_17.q.out +++ b/ql/src/test/results/clientpositive/union_remove_17.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need for this optimization, since the query is a map-only query. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- and the results are written to a table using dynamic partitions. -- There is no need for this optimization, since the query is a map-only query. @@ -191,12 +193,12 @@ POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@outputtbl1 ds=1 ds=2 -PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=1 @@ -207,12 +209,12 @@ POSTHOOK: Input: default@outputtbl1@ds=1 7 1 1 8 1 1 8 1 1 -PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '2' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2 diff --git a/ql/src/test/results/clientpositive/union_remove_18.q.out b/ql/src/test/results/clientpositive/union_remove_18.q.out index 96daa12..d5b5a17 100644 --- a/ql/src/test/results/clientpositive/union_remove_18.q.out +++ b/ql/src/test/results/clientpositive/union_remove_18.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, ds string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -250,31 +252,31 @@ ds=13 ds=17 ds=18 ds=28 -PREHOOK: query: select * from outputTbl1 where ds = '11' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '11' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=11 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '11' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '11' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=11 #### A masked pattern was here #### 1 1 11 1 1 11 -PREHOOK: query: select * from outputTbl1 where ds = '18' order by key, `values` +PREHOOK: query: select * from outputTbl1 where ds = '18' PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=18 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds = '18' order by key, `values` +POSTHOOK: query: select * from outputTbl1 where ds = '18' POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=18 #### A masked pattern was here #### 8 1 18 8 1 18 -PREHOOK: query: select * from outputTbl1 where ds is not null order by key, `values`, ds +PREHOOK: query: select * from outputTbl1 where ds is not null PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=11 @@ -284,7 +286,7 @@ PREHOOK: Input: default@outputtbl1@ds=17 PREHOOK: Input: default@outputtbl1@ds=18 PREHOOK: Input: default@outputtbl1@ds=28 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 where ds is not null order by key, `values`, ds +POSTHOOK: query: select * from outputTbl1 where ds is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=11 diff --git a/ql/src/test/results/clientpositive/union_remove_19.q.out b/ql/src/test/results/clientpositive/union_remove_19.q.out index 1a40efa..20f7e0b 100644 --- a/ql/src/test/results/clientpositive/union_remove_19.q.out +++ b/ql/src/test/results/clientpositive/union_remove_19.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -538,11 +540,11 @@ POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_2.q.out b/ql/src/test/results/clientpositive/union_remove_2.q.out index e5de3c6..b889be5 100644 --- a/ql/src/test/results/clientpositive/union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/union_remove_2.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -217,11 +219,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_20.q.out b/ql/src/test/results/clientpositive/union_remove_20.q.out index 96d76d4..746cbf3 100644 --- a/ql/src/test/results/clientpositive/union_remove_20.q.out +++ b/ql/src/test/results/clientpositive/union_remove_20.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot -- be removed. @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, the order of the columns in the select list is different. So, union cannot -- be removed. @@ -216,11 +218,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_21.q.out b/ql/src/test/results/clientpositive/union_remove_21.q.out index 1356777..c143cc8 100644 --- a/ql/src/test/results/clientpositive/union_remove_21.q.out +++ b/ql/src/test/results/clientpositive/union_remove_21.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot -- be removed. @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, all the columns are not selected. So, union cannot -- be removed. @@ -200,11 +202,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_22.q.out b/ql/src/test/results/clientpositive/union_remove_22.q.out index 933a0e8..d269270 100644 --- a/ql/src/test/results/clientpositive/union_remove_22.q.out +++ b/ql/src/test/results/clientpositive/union_remove_22.q.out @@ -379,11 +379,11 @@ POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl1.values2 EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_23.q.out b/ql/src/test/results/clientpositive/union_remove_23.q.out index b00e5d1..585cdbb 100644 --- a/ql/src/test/results/clientpositive/union_remove_23.q.out +++ b/ql/src/test/results/clientpositive/union_remove_23.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. One of the sub-queries @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. One of the sub-queries @@ -256,11 +258,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_24.q.out b/ql/src/test/results/clientpositive/union_remove_24.q.out index 95bf66b..8e266fd 100644 --- a/ql/src/test/results/clientpositive/union_remove_24.q.out +++ b/ql/src/test/results/clientpositive/union_remove_24.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -11,7 +12,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -212,11 +214,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_25.q.out b/ql/src/test/results/clientpositive/union_remove_25.q.out index 3869735..2785022 100644 --- a/ql/src/test/results/clientpositive/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/union_remove_25.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -228,12 +230,12 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 PREHOOK: Input: default@outputtbl1@ds=2004 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 POSTHOOK: Input: default@outputtbl1@ds=2004 diff --git a/ql/src/test/results/clientpositive/union_remove_3.q.out b/ql/src/test/results/clientpositive/union_remove_3.q.out index a95a48e..5e117bd 100644 --- a/ql/src/test/results/clientpositive/union_remove_3.q.out +++ b/ql/src/test/results/clientpositive/union_remove_3.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->remove->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. -- There is no need for any optimization, since the whole query can be processed in @@ -13,7 +14,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->remove->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->remove->filesink optimization -- Union of 3 subqueries is performed (all of which are map-only queries) -- followed by select star and a file sink. -- There is no need for any optimization, since the whole query can be processed in @@ -206,11 +208,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_4.q.out b/ql/src/test/results/clientpositive/union_remove_4.q.out index 818ae80..0004ed0 100644 --- a/ql/src/test/results/clientpositive/union_remove_4.q.out +++ b/ql/src/test/results/clientpositive/union_remove_4.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -12,7 +13,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -250,11 +252,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_5.q.out b/ql/src/test/results/clientpositive/union_remove_5.q.out index d6c2b99..f88d1d2 100644 --- a/ql/src/test/results/clientpositive/union_remove_5.q.out +++ b/ql/src/test/results/clientpositive/union_remove_5.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -263,11 +265,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_6.q.out b/ql/src/test/results/clientpositive/union_remove_6.q.out index 1b076c2..0a80e63 100644 --- a/ql/src/test/results/clientpositive/union_remove_6.q.out +++ b/ql/src/test/results/clientpositive/union_remove_6.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -9,7 +10,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -230,11 +232,11 @@ POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(n POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -248,11 +250,11 @@ POSTHOOK: Input: default@outputtbl1 7 1 8 2 8 2 -PREHOOK: query: select * from outputTbl2 order by key, `values` +PREHOOK: query: select * from outputTbl2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl2 order by key, `values` +POSTHOOK: query: select * from outputTbl2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out index b1e79e7..6439a0f 100644 --- a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -242,11 +242,11 @@ POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(n POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, ] -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### @@ -260,11 +260,11 @@ POSTHOOK: Input: default@outputtbl1 7 1 8 2 8 2 -PREHOOK: query: select * from outputTbl2 order by key, `values` +PREHOOK: query: select * from outputTbl2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl2 order by key, `values` +POSTHOOK: query: select * from outputTbl2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_7.q.out b/ql/src/test/results/clientpositive/union_remove_7.q.out index 5541cb0..0a2dc62 100644 --- a/ql/src/test/results/clientpositive/union_remove_7.q.out +++ b/ql/src/test/results/clientpositive/union_remove_7.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select star and a file sink -- There is no need to write the temporary results of the sub-queries, and then read them -- again to process the union. The union can be removed completely. @@ -210,11 +212,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_8.q.out b/ql/src/test/results/clientpositive/union_remove_8.q.out index 22e6f15..3003b3c 100644 --- a/ql/src/test/results/clientpositive/union_remove_8.q.out +++ b/ql/src/test/results/clientpositive/union_remove_8.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -15,7 +16,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job) -- followed by select star and a file sink. -- There is no need to write the temporary results of the sub-queries, and then read them @@ -221,11 +223,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_remove_9.q.out b/ql/src/test/results/clientpositive/union_remove_9.q.out index 70b5ee9..deb2a34 100644 --- a/ql/src/test/results/clientpositive/union_remove_9.q.out +++ b/ql/src/test/results/clientpositive/union_remove_9.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. -- There is no need for the outer union. @@ -14,7 +15,8 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (one of which contains a union and is map-only), -- and the other one is a map-reduce query followed by select star and a file sink. -- There is no need for the outer union. @@ -268,11 +270,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: select * from outputTbl1 order by key, `values` +PREHOOK: query: select * from outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -POSTHOOK: query: select * from outputTbl1 order by key, `values` +POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_script.q.out b/ql/src/test/results/clientpositive/union_script.q.out index e044f63..44ea01b 100644 --- a/ql/src/test/results/clientpositive/union_script.q.out +++ b/ql/src/test/results/clientpositive/union_script.q.out @@ -1,10 +1,12 @@ -PREHOOK: query: select * from ( - select transform(key) using 'cat' as cola from src)s order by cola +PREHOOK: query: -- SORT_QUERY_RESULTS +select * from ( + select transform(key) using 'cat' as cola from src)s PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select * from ( - select transform(key) using 'cat' as cola from src)s order by cola +POSTHOOK: query: -- SORT_QUERY_RESULTS +select * from ( + select transform(key) using 'cat' as cola from src)s POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -511,14 +513,14 @@ POSTHOOK: Input: default@src PREHOOK: query: select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola + select transform(key) using 'cat' as cola from src) s PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: select * from ( select transform(key) using 'cat' as cola from src union all - select transform(key) using 'cat' as cola from src) s order by cola + select transform(key) using 'cat' as cola from src) s POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/union_type_chk.q.out b/ql/src/test/results/clientpositive/union_type_chk.q.out index 12f060b..1eb0182 100644 --- a/ql/src/test/results/clientpositive/union_type_chk.q.out +++ b/ql/src/test/results/clientpositive/union_type_chk.q.out @@ -1,28 +1,30 @@ -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: -- SORT_QUERY_RESULTS +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0.4999999900000002 +0.4999999900000002 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 4.999999900000002E-9 -0.4999999900000002 -0.4999999900000002 -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL -NULL 0.25 0.25 0.25 0.25 +NULL +NULL diff --git a/ql/src/test/results/clientpositive/union_view.q.out b/ql/src/test/results/clientpositive/union_view.q.out index 6409d25..d145eb3 100644 --- a/ql/src/test/results/clientpositive/union_view.q.out +++ b/ql/src/test/results/clientpositive/union_view.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) +PREHOOK: query: -- SORT_QUERY_RESULTS +CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@src_union_1 -POSTHOOK: query: CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) +POSTHOOK: query: -- SORT_QUERY_RESULTS +CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@src_union_1 @@ -706,14 +708,16 @@ STAGE PLANS: Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src_union_2 filterExpr: ((key = 86) and ds is not null) (type: boolean) @@ -728,14 +732,16 @@ STAGE PLANS: Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: src_union_3 filterExpr: ((key = 86) and ds is not null) (type: boolean) @@ -750,26 +756,16 @@ STAGE PLANS: Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: 86 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Map Reduce