Index: ql/src/test/results/clientpositive/skewjoinopt8.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt8.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt8.q.out (working copy) @@ -36,12 +36,16 @@ PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than -- 2 tables. The join key is the same, and so a 3-way join would be performed. -- 1 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key PREHOOK: type: QUERY POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than -- 2 tables. The join key is the same, and so a 3-way join would be performed. -- 1 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY @@ -285,12 +289,14 @@ PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt3.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt3.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt3.q.out (working copy) @@ -27,6 +27,7 @@ PREHOOK: query: -- a simple query with skew on both the tables. One of the skewed -- value is common to both the tables. The skewed value should not be -- repeated in the filter. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -34,6 +35,7 @@ POSTHOOK: query: -- a simple query with skew on both the tables. One of the skewed -- value is common to both the tables. The skewed value should not be -- repeated in the filter. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -224,11 +226,13 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -435,21 +439,23 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -1 11 NULL NULL NULL NULL 4 14 NULL NULL 5 15 -7 17 NULL NULL +1 11 NULL NULL 2 12 2 22 3 13 3 13 +7 17 NULL NULL 8 18 8 18 8 18 8 18 8 28 8 18 Index: ql/src/test/results/clientpositive/skewjoinopt15.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt15.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt15.q.out (working copy) @@ -64,6 +64,8 @@ -- Otherwise this test is similar to skewjoinopt1.q -- Both the joined tables are skewed, and the joined column -- is an integer +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY @@ -71,6 +73,8 @@ -- Otherwise this test is similar to skewjoinopt1.q -- Both the joined tables are skewed, and the joined column -- is an integer +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY @@ -264,11 +268,13 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -277,12 +283,12 @@ POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] +2 12 2 22 +3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 -3 13 3 13 PREHOOK: query: -- test outer joins also EXPLAIN @@ -483,11 +489,13 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -498,12 +506,12 @@ POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] NULL NULL 4 14 NULL NULL 5 15 +2 12 2 22 +3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 -3 13 3 13 PREHOOK: query: -- an aggregation at the end should not change anything EXPLAIN Index: ql/src/test/results/clientpositive/skewjoinopt10.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt10.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt10.q.out (working copy) @@ -29,10 +29,14 @@ POSTHOOK: Lineage: array_valued_t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: array_valued_t1.value EXPRESSION [(t1)t1.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed by a lateral view +-- adding a order by at the end to make the results deterministic + explain select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val PREHOOK: type: QUERY POSTHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed by a lateral view +-- adding a order by at the end to make the results deterministic + explain select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val POSTHOOK: type: QUERY @@ -284,11 +288,13 @@ PREHOOK: query: select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +ORDER BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@array_valued_t1 PREHOOK: Input: default@t1 #### A masked pattern was here #### POSTHOOK: query: select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +ORDER BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@array_valued_t1 POSTHOOK: Input: default@t1 @@ -300,6 +306,6 @@ 3 ["13"] 13 7 ["17"] 17 8 ["18"] 18 -8 ["28"] 28 8 ["18"] 18 8 ["28"] 28 +8 ["28"] 28 Index: ql/src/test/results/clientpositive/skewjoinopt5.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt5.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt5.q.out (working copy) @@ -26,12 +26,14 @@ POSTHOOK: Output: default@t2 PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the first skewed column +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the first skewed column +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -222,18 +224,20 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +2 12 2 22 +3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 -3 13 3 13 Index: ql/src/test/results/clientpositive/skewjoinopt17.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt17.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt17.q.out (working copy) @@ -28,6 +28,7 @@ -- skewed by one column. Ths join is performed on the first skewed column -- The skewed value for the jon key is common to both the tables. -- In this case, the skewed join value is not repeated in the filter. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -36,6 +37,7 @@ -- skewed by one column. Ths join is performed on the first skewed column -- The skewed value for the jon key is common to both the tables. -- In this case, the skewed join value is not repeated in the filter. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -226,21 +228,23 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +2 12 2 22 3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 PREHOOK: query: DROP TABLE T1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@t1 @@ -498,11 +502,13 @@ limit: -1 PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt12.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt12.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt12.q.out (working copy) @@ -26,12 +26,14 @@ POSTHOOK: Output: default@t2 PREHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values -- is common to both the tables. The join key matches the skewed key set. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val PREHOOK: type: QUERY POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values -- is common to both the tables. The join key matches the skewed key set. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val @@ -237,11 +239,13 @@ limit: -1 PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt7.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt7.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt7.q.out (working copy) @@ -38,12 +38,16 @@ PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than -- 2 tables. The join key is the same, and so a 3-way join would be performed. -- 2 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key PREHOOK: type: QUERY POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than -- 2 tables. The join key is the same, and so a 3-way join would be performed. -- 2 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY @@ -287,12 +291,14 @@ PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt19.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt19.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt19.q.out (working copy) @@ -27,12 +27,16 @@ PREHOOK: query: -- add a test where the skewed key is also the bucketized key -- it should not matter, and the compile time skewed join -- optimization is performed +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- add a test where the skewed key is also the bucketized key -- it should not matter, and the compile time skewed join -- optimization is performed +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY @@ -222,18 +226,20 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +2 12 2 22 3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 Index: ql/src/test/results/clientpositive/skewjoinopt2.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt2.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt2.q.out (working copy) @@ -28,6 +28,7 @@ -- multiple skew values are present for the skewed keys -- but the skewed values do not overlap. -- The join values are a superset of the skewed keys. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val @@ -36,6 +37,7 @@ -- multiple skew values are present for the skewed keys -- but the skewed values do not overlap. -- The join values are a superset of the skewed keys. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val @@ -241,11 +243,13 @@ limit: -1 PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -464,11 +468,13 @@ limit: -1 PREHOOK: query: SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -477,9 +483,9 @@ 2 12 NULL NULL 3 13 3 13 7 17 NULL NULL +8 28 NULL NULL 8 18 8 18 8 18 8 18 -8 28 NULL NULL PREHOOK: query: -- a group by at the end should not change anything EXPLAIN Index: ql/src/test/results/clientpositive/skewjoinopt14.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt14.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt14.q.out (working copy) @@ -41,6 +41,8 @@ -- 3 consist of a sub-query which contains a join, the compile time skew join -- optimization is not enabled for table 3, but it is used for the first join between -- tables 1 and 2 +-- adding a order by at the end to make the results deterministic + EXPLAIN select * from @@ -53,6 +55,8 @@ -- 3 consist of a sub-query which contains a join, the compile time skew join -- optimization is not enabled for table 3, but it is used for the first join between -- tables 1 and 2 +-- adding a order by at the end to make the results deterministic + EXPLAIN select * from @@ -297,6 +301,7 @@ from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val +order by a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -306,6 +311,7 @@ from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val +order by a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt9.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt9.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt9.q.out (working copy) @@ -24,6 +24,7 @@ POSTHOOK: Output: default@t2 PREHOOK: query: -- no skew join compile time optimization would be performed if one of the -- join sources is a sub-query consisting of a union all +-- adding a order by at the end to make the results deterministic EXPLAIN select * from ( @@ -35,6 +36,7 @@ PREHOOK: type: QUERY POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the -- join sources is a sub-query consisting of a union all +-- adding a order by at the end to make the results deterministic EXPLAIN select * from ( @@ -161,6 +163,7 @@ select key, val from T1 ) subq1 join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -172,6 +175,7 @@ select key, val from T1 ) subq1 join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -343,6 +347,7 @@ select key, count(1) as cnt from T1 group by key ) subq1 join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.cnt, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -352,6 +357,7 @@ select key, count(1) as cnt from T1 group by key ) subq1 join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.cnt, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt4.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt4.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt4.q.out (working copy) @@ -24,12 +24,14 @@ POSTHOOK: Output: default@t2 PREHOOK: query: -- only of the tables of the join (the left table of the join) is skewed -- the skewed filter would still be applied to both the tables +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- only of the tables of the join (the left table of the join) is skewed -- the skewed filter would still be applied to both the tables +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -220,21 +222,23 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +2 12 2 22 3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 PREHOOK: query: -- the order of the join should not matter, just confirming EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key @@ -429,18 +433,20 @@ PREHOOK: query: SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +2 22 2 12 3 13 3 13 8 18 8 18 -8 18 8 28 8 18 8 18 8 18 8 28 -2 22 2 12 +8 18 8 28 Index: ql/src/test/results/clientpositive/skewjoinopt16.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt16.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt16.q.out (working copy) @@ -26,12 +26,14 @@ POSTHOOK: Output: default@t2 PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the both the columns +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val PREHOOK: type: QUERY POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the both the columns +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val @@ -237,15 +239,17 @@ limit: -1 PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +3 13 3 13 8 18 8 18 8 18 8 18 -3 13 3 13 Index: ql/src/test/results/clientpositive/skewjoinopt11.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt11.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt11.q.out (working copy) @@ -25,6 +25,8 @@ PREHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed -- by a union. Both sides of a union consist of a join, which should have used -- skew join compile time optimization. +-- adding a order by at the end to make the results deterministic + EXPLAIN select * from ( @@ -36,6 +38,8 @@ POSTHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed -- by a union. Both sides of a union consist of a join, which should have used -- skew join compile time optimization. +-- adding a order by at the end to make the results deterministic + EXPLAIN select * from ( @@ -433,6 +437,7 @@ union all select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key ) subq1 +ORDER BY key, val1, val2 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -443,19 +448,20 @@ union all select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key ) subq1 +ORDER BY key, val1, val2 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -3 13 13 -8 18 18 -8 18 18 -8 28 18 -8 28 18 2 12 22 2 12 22 3 13 13 +3 13 13 8 18 18 8 18 18 +8 18 18 +8 18 18 8 28 18 8 28 18 +8 28 18 +8 28 18 Index: ql/src/test/results/clientpositive/skewjoinopt20.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt20.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt20.q.out (working copy) @@ -27,12 +27,16 @@ PREHOOK: query: -- add a test where the skewed key is also the bucketized/sorted key -- it should not matter, and the compile time skewed join -- optimization is performed +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- add a test where the skewed key is also the bucketized/sorted key -- it should not matter, and the compile time skewed join -- optimization is performed +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY @@ -222,18 +226,20 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +2 12 2 22 3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 Index: ql/src/test/results/clientpositive/skewjoinopt6.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt6.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt6.q.out (working copy) @@ -27,6 +27,7 @@ PREHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values -- is common to both the tables. The join key is a subset of the skewed key set: -- it only contains the first skewed key for both the tables +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -34,6 +35,7 @@ POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values -- is common to both the tables. The join key is a subset of the skewed key set: -- it only contains the first skewed key for both the tables +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -224,11 +226,13 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt18.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt18.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt18.q.out (working copy) @@ -52,12 +52,16 @@ PREHOOK: query: -- Once HIVE-3445 is fixed, the compile time skew join optimization would be -- applicable here. Till the above jira is fixed, it would be performed as a -- regular join +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- Once HIVE-3445 is fixed, the compile time skew join optimization would be -- applicable here. Till the above jira is fixed, it would be performed as a -- regular join +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY @@ -141,11 +145,13 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/results/clientpositive/skewjoinopt1.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt1.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt1.q.out (working copy) @@ -25,11 +25,13 @@ POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 PREHOOK: query: -- a simple join query with skew on both the tables on the join key +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: -- a simple join query with skew on both the tables on the join key +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key @@ -220,21 +222,23 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### +2 12 2 22 +3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 -3 13 3 13 PREHOOK: query: -- test outer joins also EXPLAIN @@ -431,23 +435,25 @@ PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### NULL NULL 4 14 NULL NULL 5 15 +2 12 2 22 +3 13 3 13 8 18 8 18 8 18 8 18 8 28 8 18 8 28 8 18 -2 12 2 22 -3 13 3 13 PREHOOK: query: -- an aggregation at the end should not change anything EXPLAIN Index: ql/src/test/results/clientpositive/skewjoinopt13.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt13.q.out (revision 1389705) +++ ql/src/test/results/clientpositive/skewjoinopt13.q.out (working copy) @@ -38,6 +38,7 @@ -- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table -- 3 consist of a sub-query which contains a join, the compile time skew join -- optimization is not performed +-- adding a order by at the end to make the results deterministic EXPLAIN select * @@ -50,6 +51,7 @@ -- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table -- 3 consist of a sub-query which contains a join, the compile time skew join -- optimization is not performed +-- adding a order by at the end to make the results deterministic EXPLAIN select * @@ -198,6 +200,7 @@ from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val +order by a.key, b.key, c.key, a.val, b.val, c.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -207,6 +210,7 @@ from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val +order by a.key, b.key, c.key, a.val, b.val, c.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 Index: ql/src/test/queries/clientpositive/skewjoinopt1.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt1.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt1.q (working copy) @@ -12,18 +12,21 @@ LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2; -- a simple join query with skew on both the tables on the join key +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; -- an aggregation at the end should not change anything Index: ql/src/test/queries/clientpositive/skewjoinopt19.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt19.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt19.q (working copy) @@ -14,7 +14,10 @@ -- add a test where the skewed key is also the bucketized key -- it should not matter, and the compile time skewed join -- optimization is performed +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt3.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt3.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt3.q (working copy) @@ -14,15 +14,18 @@ -- a simple query with skew on both the tables. One of the skewed -- value is common to both the tables. The skewed value should not be -- repeated in the filter. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt5.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt5.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt5.q (working copy) @@ -13,8 +13,10 @@ -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the first skewed column +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt7.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt7.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt7.q (working copy) @@ -18,7 +18,10 @@ -- This test is for validating skewed join compile time optimization for more than -- 2 tables. The join key is the same, and so a 3-way join would be performed. -- 2 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key; -SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key; +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val; Index: ql/src/test/queries/clientpositive/skewjoinopt10.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt10.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt10.q (working copy) @@ -10,8 +10,10 @@ insert overwrite table array_valued_T1 select key, array(value) from T1; -- This test is to verify the skew join compile optimization when the join is followed by a lateral view +-- adding a order by at the end to make the results deterministic + explain select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val; -select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val; - +select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +ORDER BY key, val; Index: ql/src/test/queries/clientpositive/skewjoinopt9.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt9.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt9.q (working copy) @@ -12,6 +12,7 @@ -- no skew join compile time optimization would be performed if one of the -- join sources is a sub-query consisting of a union all +-- adding a order by at the end to make the results deterministic EXPLAIN select * from ( @@ -27,7 +28,8 @@ union all select key, val from T1 ) subq1 -join T2 b on subq1.key = b.key; +join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.val, b.val; -- no skew join compile time optimization would be performed if one of the -- join sources is a sub-query consisting of a group by @@ -42,4 +44,5 @@ ( select key, count(1) as cnt from T1 group by key ) subq1 -join T2 b on subq1.key = b.key; +join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.cnt, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt12.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt12.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt12.q (working copy) @@ -13,8 +13,10 @@ -- Both the join tables are skewed by 2 keys, and one of the skewed values -- is common to both the tables. The join key matches the skewed key set. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt14.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt14.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt14.q (working copy) @@ -21,6 +21,8 @@ -- 3 consist of a sub-query which contains a join, the compile time skew join -- optimization is not enabled for table 3, but it is used for the first join between -- tables 1 and 2 +-- adding a order by at the end to make the results deterministic + EXPLAIN select * from @@ -30,5 +32,6 @@ select * from T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val; +join T3 c on a.val = c.val +order by a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt16.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt16.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt16.q (working copy) @@ -13,8 +13,10 @@ -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the both the columns +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt18.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt18.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt18.q (working copy) @@ -20,7 +20,10 @@ -- Once HIVE-3445 is fixed, the compile time skew join optimization would be -- applicable here. Till the above jira is fixed, it would be performed as a -- regular join +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt2.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt2.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt2.q (working copy) @@ -15,18 +15,21 @@ -- multiple skew values are present for the skewed keys -- but the skewed values do not overlap. -- The join values are a superset of the skewed keys. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val; -- a group by at the end should not change anything Index: ql/src/test/queries/clientpositive/skewjoinopt4.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt4.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt4.q (working copy) @@ -12,14 +12,17 @@ -- only of the tables of the join (the left table of the join) is skewed -- the skewed filter would still be applied to both the tables +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; -- the order of the join should not matter, just confirming EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key; -SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key; +SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt6.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt6.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt6.q (working copy) @@ -14,8 +14,10 @@ -- Both the join tables are skewed by 2 keys, and one of the skewed values -- is common to both the tables. The join key is a subset of the skewed key set: -- it only contains the first skewed key for both the tables +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt8.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt8.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt8.q (working copy) @@ -17,7 +17,10 @@ -- This test is for validating skewed join compile time optimization for more than -- 2 tables. The join key is the same, and so a 3-way join would be performed. -- 1 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key; -SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key; +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val; Index: ql/src/test/queries/clientpositive/skewjoinopt11.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt11.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt11.q (working copy) @@ -13,6 +13,8 @@ -- This test is to verify the skew join compile optimization when the join is followed -- by a union. Both sides of a union consist of a join, which should have used -- skew join compile time optimization. +-- adding a order by at the end to make the results deterministic + EXPLAIN select * from ( @@ -26,4 +28,5 @@ select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key union all select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key -) subq1; +) subq1 +ORDER BY key, val1, val2; Index: ql/src/test/queries/clientpositive/skewjoinopt20.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt20.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt20.q (working copy) @@ -14,7 +14,10 @@ -- add a test where the skewed key is also the bucketized/sorted key -- it should not matter, and the compile time skewed join -- optimization is performed +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; Index: ql/src/test/queries/clientpositive/skewjoinopt13.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt13.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt13.q (working copy) @@ -19,6 +19,7 @@ -- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table -- 3 consist of a sub-query which contains a join, the compile time skew join -- optimization is not performed +-- adding a order by at the end to make the results deterministic EXPLAIN select * @@ -29,5 +30,6 @@ select * from T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val; +join T3 c on a.val = c.val +order by a.key, b.key, c.key, a.val, b.val, c.val; Index: ql/src/test/queries/clientpositive/skewjoinopt15.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt15.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt15.q (working copy) @@ -21,17 +21,21 @@ -- Otherwise this test is similar to skewjoinopt1.q -- Both the joined tables are skewed, and the joined column -- is an integer +-- adding a order by at the end to make the results deterministic + EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; -- an aggregation at the end should not change anything Index: ql/src/test/queries/clientpositive/skewjoinopt17.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt17.q (revision 1389705) +++ ql/src/test/queries/clientpositive/skewjoinopt17.q (working copy) @@ -15,11 +15,13 @@ -- skewed by one column. Ths join is performed on the first skewed column -- The skewed value for the jon key is common to both the tables. -- In this case, the skewed join value is not repeated in the filter. +-- adding a order by at the end to make the results deterministic EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val; DROP TABLE T1; DROP TABLE T2; @@ -42,4 +44,5 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val; \ No newline at end of file