diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1ca113c..7ee1190 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8991,21 +8991,21 @@ private Operator genUnionPlan(String unionalias, String leftalias, unionoutRR.put(unionalias, field, unionColInfo); } - // For Spark, we rely on the generated SelectOperator to do the type casting. + // For Spark,TEZ we rely on the generated SelectOperator to do the type casting. // Consider: // SEL_1 (int) SEL_2 (int) SEL_3 (double) // If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1 // with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error // will happen afterwards. The solution here is to insert one after UNION_1, which // cast int to double. - boolean isSpark = HiveConf.getVar(conf, - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"); + boolean isMR = HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr"); - if (isSpark || !(leftOp instanceof UnionOperator)) { + if (!isMR || !(leftOp instanceof UnionOperator)) { leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias); } - if (isSpark || !(rightOp instanceof UnionOperator)) { + if (!isMR || !(rightOp instanceof UnionOperator)) { rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias); } @@ -9158,9 +9158,17 @@ private Operator genUnionPlan(String unionalias, String leftalias, columnExprMap.put(name, columns.get(i)); } - Operator newInputOp = OperatorFactory.getAndMakeChild( - new SelectDesc(columns, colName), new RowSchema(rowResolver.getColumnInfos()), - columnExprMap, origInputOp); + Operator newInputOp = null; + if (origInputOp instanceof SelectOperator && origInputOp.getParentOperators().size() == 1) { + newInputOp = OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema( + rowResolver.getColumnInfos()), columnExprMap, origInputOp.getParentOperators().get(0)); + origInputOp.getParentOperators().get(0).removeChild(origInputOp); + origInputOp.removeParents(); + } else { + newInputOp = OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema( + rowResolver.getColumnInfos()), columnExprMap, origInputOp); + } + return putOpInsertMap(newInputOp, rowResolver); } diff --git a/ql/src/test/queries/clientpositive/union_hd.q b/ql/src/test/queries/clientpositive/union_hd.q new file mode 100644 index 0000000..0c8282a --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_hd.q @@ -0,0 +1,6 @@ +set hive.cbo.enable=false; +set hive.execution.engine=mr; + +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y; + +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y; diff --git a/ql/src/test/results/clientpositive/tez/union_hd.q.out b/ql/src/test/results/clientpositive/tez/union_hd.q.out new file mode 100644 index 0000000..12f060b --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/union_hd.q.out @@ -0,0 +1,28 @@ +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4.999999900000002E-9 +4.999999900000002E-9 +4.999999900000002E-9 +4.999999900000002E-9 +0.4999999900000002 +0.4999999900000002 +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +NULL +0.25 +0.25 +0.25 +0.25 diff --git a/ql/src/test/results/clientpositive/union_hd.q.out b/ql/src/test/results/clientpositive/union_hd.q.out new file mode 100644 index 0000000..12f060b --- /dev/null +++ b/ql/src/test/results/clientpositive/union_hd.q.out @@ -0,0 +1,28 @@ +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4.999999900000002E-9 +4.999999900000002E-9 +4.999999900000002E-9 +4.999999900000002E-9 +0.4999999900000002 +0.4999999900000002 +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +NULL +0.25 +0.25 +0.25 +0.25 diff --git a/ql/src/test/results/clientpositive/union_hd.q.out.bak b/ql/src/test/results/clientpositive/union_hd.q.out.bak new file mode 100644 index 0000000..12f060b --- /dev/null +++ b/ql/src/test/results/clientpositive/union_hd.q.out.bak @@ -0,0 +1,28 @@ +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4.999999900000002E-9 +4.999999900000002E-9 +4.999999900000002E-9 +4.999999900000002E-9 +0.4999999900000002 +0.4999999900000002 +PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +NULL +0.25 +0.25 +0.25 +0.25