From 60fd864c56da118f99f6ef68f1c13171aa11851d Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Fri, 19 Aug 2016 16:13:49 -0700 Subject: [PATCH] HIVE-14590 : CBO (Calcite Return Path) Incorrect result set when limit is present in one of the union branches --- .../optimizer/calcite/translator/HiveOpConverter.java | 18 ++++++++++++------ ql/src/test/queries/clientpositive/union_null.q | 3 +++ ql/src/test/results/clientpositive/union_null.q.out | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index 8d56595..0ead9be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -148,9 +149,19 @@ private OpAttr clone(Operator... inputs) { } } + private void handleTopLimit(Operator rootOp) { + if (rootOp instanceof LimitOperator) { + // this can happen only on top most limit, not while visiting Limit Operator + // since that can be within subquery. + this.semanticAnalyzer.getQB().getParseInfo().setOuterQueryLimit(((LimitOperator) rootOp).getConf().getLimit()); + } + } + public Operator convert(RelNode root) throws SemanticException { OpAttr opAf = dispatch(root); - return opAf.inputs.get(0); + Operator rootOp = opAf.inputs.get(0); + handleTopLimit(rootOp); + return rootOp; } OpAttr dispatch(RelNode rn) throws SemanticException { @@ -492,11 +503,6 @@ OpAttr visit(HiveSortLimit sortRel) throws SemanticException { int limit = RexLiteral.intValue(sortRel.fetch); int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset); LimitDesc limitDesc = new LimitDesc(offset,limit); - // Because we are visiting the operators recursively, the last limit op that - // calls the following function will set the global property. - if (this.semanticAnalyzer != null && semanticAnalyzer.getQB() != null - && semanticAnalyzer.getQB().getParseInfo() != null) - this.semanticAnalyzer.getQB().getParseInfo().setOuterQueryLimit(limit); ArrayList cinfoLst = createColInfos(resultOp); resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp); diff --git a/ql/src/test/queries/clientpositive/union_null.q b/ql/src/test/queries/clientpositive/union_null.q index a17325c..23da07a 100644 --- a/ql/src/test/queries/clientpositive/union_null.q +++ b/ql/src/test/queries/clientpositive/union_null.q @@ -2,6 +2,9 @@ -- HIVE-2901 select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a; +set hive.cbo.returnpath.hiveop=true; +select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a; +set hive.cbo.returnpath.hiveop=false; -- HIVE-4837 select * from (select * from (select null as N from src1 group by key)a UNION ALL select * from (select null as N from src1 group by key)b ) a; diff --git a/ql/src/test/results/clientpositive/union_null.q.out b/ql/src/test/results/clientpositive/union_null.q.out index 32cdf65..a3407d4 100644 --- a/ql/src/test/results/clientpositive/union_null.q.out +++ b/ql/src/test/results/clientpositive/union_null.q.out @@ -22,6 +22,24 @@ NULL NULL NULL NULL +PREHOOK: query: select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +val_0 +val_0 +val_0 +val_10 +val_100 PREHOOK: query: -- HIVE-4837 select * from (select * from (select null as N from src1 group by key)a UNION ALL select * from (select null as N from src1 group by key)b ) a PREHOOK: type: QUERY -- 1.7.12.4 (Apple Git-37)