diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 6df3d47..2881882 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -571,6 +571,7 @@ spark.query.files=add_part_multiple.q, \ bucketsortoptimize_insert_6.q, \ bucketsortoptimize_insert_7.q, \ bucketsortoptimize_insert_8.q, \ + cbo_gby_empty.q, \ column_access_stats.q, \ count.q, \ create_merge_compressed.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2466d78..10ec7be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8802,11 +8802,21 @@ private Operator genUnionPlan(String unionalias, String leftalias, unionoutRR.put(unionalias, field, unionColInfo); } - if (!(leftOp instanceof UnionOperator)) { + // For Spark, we rely on the generated SelectOperator to do the type casting. + // Consider: + // SEL_1 (int) SEL_2 (int) SEL_3 (double) + // If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1 + // with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error + // will happen afterwards. The solution here is to insert one after UNION_1, which + // cast int to double. + boolean isSpark = HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"); + + if (isSpark || !(leftOp instanceof UnionOperator)) { leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias); } - if (!(rightOp instanceof UnionOperator)) { + if (isSpark || !(rightOp instanceof UnionOperator)) { rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias); } diff --git ql/src/test/results/clientpositive/spark/cbo_gby_empty.q.out ql/src/test/results/clientpositive/spark/cbo_gby_empty.q.out new file mode 100644 index 0000000..68f0255 --- /dev/null +++ ql/src/test/results/clientpositive/spark/cbo_gby_empty.q.out @@ -0,0 +1,77 @@ +PREHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr +select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr +select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +tst1 +PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +tst1 500 +PREHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc order by unionsrc.key +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc order by unionsrc.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +avg +max +min +PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc order by unionsrc.key +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc order by unionsrc.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +avg 1.5 +max 3.0 +min 1.0 +PREHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc group by unionsrc.key order by unionsrc.key +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc group by unionsrc.key order by unionsrc.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +avg 1 +max 1 +min 1