From 89305f2b7be7bb0b1be8450dab30587212ffaf67 Mon Sep 17 00:00:00 2001 From: Gopal V Date: Wed, 14 Feb 2018 11:48:05 -0800 Subject: [PATCH] HIVE-18713.1 --- .../calcite/translator/RexNodeConverter.java | 6 + .../queries/clientpositive/cbo_rp_simple_select.q | 19 +- .../clientpositive/cbo_rp_simple_select.q.out | 234 +++++++++++++++++++++ 3 files changed, 257 insertions(+), 2 deletions(-) diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 39ff591..36a4f62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -336,6 +336,12 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) { // If it is a floor operator, we need to rewrite it childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst); + } else if (calciteOp.getKind() == SqlKind.IN && childRexNodeLst.size() == 2) { + // if it is a single item in an IN clause, transform A IN (B) to A = B + // from IN [A,B] => EQUALS [A,B] + calciteOp = + SqlFunctionConverter.getCalciteOperator("=", FunctionRegistry.getFunctionInfo("=") + .getGenericUDF(), argTypeBldr.build(), retType); } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { diff --git ql/src/test/queries/clientpositive/cbo_rp_simple_select.q ql/src/test/queries/clientpositive/cbo_rp_simple_select.q index 6be2b19..deb0f8f 100644 --- ql/src/test/queries/clientpositive/cbo_rp_simple_select.q +++ ql/src/test/queries/clientpositive/cbo_rp_simple_select.q @@ -45,8 +45,6 @@ select * from (select cbo_t2.key as x, c_int as c_int, (((c_int+c_float)*10)+5) select cbo_t1.c_int+c_float as x , c_int as c_int, (((c_int+c_float)*10)+5) as y from (select * from cbo_t1 where cbo_t1.c_int >= 0) as cbo_t1 where cbo_t1.c_int >= 0; select cbo_t2.c_int+c_float as x , c_int as c_int, (((c_int+c_float)*10)+5) as y from (select * from cbo_t1 where cbo_t1.c_int >= 0) as cbo_t2 where cbo_t2.c_int >= 0; - - -- 13. null expr in select list select null from cbo_t3; @@ -55,3 +53,20 @@ select key from cbo_t1 where c_int = -6 or c_int = +6; -- 15. query referencing only partition columns select count(cbo_t1.dt) from cbo_t1 join cbo_t2 on cbo_t1.dt = cbo_t2.dt where cbo_t1.dt = '2014' ; + + +-- IN expression rewrite + +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int); -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int); -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int); -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL); -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int); -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0); -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int); -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int); -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int); -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL); -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int); -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0); -- no rewrite diff --git ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out index 2e06e61..469fbaa 100644 --- ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out @@ -741,3 +741,237 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### 400 +PREHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = null) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +POSTHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 -- 2.4.0