diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 39ff591..7a482d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -257,6 +257,7 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { func.getChildren().size() != 0; boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween; boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn; + boolean isAllPrimitive = true; if (isNumeric) { tgtDT = func.getTypeInfo(); @@ -312,6 +313,8 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { } } + isAllPrimitive = + isAllPrimitive && tmpExprNode.getTypeInfo().getCategory() == Category.PRIMITIVE; argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); tmpRN = convert(tmpExprNode); childRexNodeLst.add(tmpRN); @@ -336,6 +339,13 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) { // If it is a floor operator, we need to rewrite it childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst); + } else if (calciteOp.getKind() == SqlKind.IN && childRexNodeLst.size() == 2 && isAllPrimitive) { + // if it is a single item in an IN clause, transform A IN (B) to A = B + // from IN [A,B] => EQUALS [A,B] + // except complex types + calciteOp = + SqlFunctionConverter.getCalciteOperator("=", FunctionRegistry.getFunctionInfo("=") + .getGenericUDF(), argTypeBldr.build(), retType); } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { diff --git ql/src/test/queries/clientpositive/cbo_rp_simple_select.q ql/src/test/queries/clientpositive/cbo_rp_simple_select.q index 6be2b19..deb0f8f 100644 --- ql/src/test/queries/clientpositive/cbo_rp_simple_select.q +++ ql/src/test/queries/clientpositive/cbo_rp_simple_select.q @@ -45,8 +45,6 @@ select * from (select cbo_t2.key as x, c_int as c_int, (((c_int+c_float)*10)+5) select cbo_t1.c_int+c_float as x , c_int as c_int, (((c_int+c_float)*10)+5) as y from (select * from cbo_t1 where cbo_t1.c_int >= 0) as cbo_t1 where cbo_t1.c_int >= 0; select cbo_t2.c_int+c_float as x , c_int as c_int, (((c_int+c_float)*10)+5) as y from (select * from cbo_t1 where cbo_t1.c_int >= 0) as cbo_t2 where cbo_t2.c_int >= 0; - - -- 13. null expr in select list select null from cbo_t3; @@ -55,3 +53,20 @@ select key from cbo_t1 where c_int = -6 or c_int = +6; -- 15. query referencing only partition columns select count(cbo_t1.dt) from cbo_t1 join cbo_t2 on cbo_t1.dt = cbo_t2.dt where cbo_t1.dt = '2014' ; + + +-- IN expression rewrite + +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int); -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int); -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int); -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL); -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int); -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0); -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int); -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int); -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int); -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL); -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int); -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0); -- no rewrite diff --git ql/src/test/queries/clientpositive/cbo_simple_select.q ql/src/test/queries/clientpositive/cbo_simple_select.q index 50e57fd..2d95233 100644 --- ql/src/test/queries/clientpositive/cbo_simple_select.q +++ ql/src/test/queries/clientpositive/cbo_simple_select.q @@ -54,3 +54,21 @@ select key from cbo_t1 where c_int = -6 or c_int = +6; -- 15. query referencing only partition columns select count(cbo_t1.dt) from cbo_t1 join cbo_t2 on cbo_t1.dt = cbo_t2.dt where cbo_t1.dt = '2014' ; + + +-- IN expression rewrite + +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int); -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int); -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int); -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL); -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int); -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0); -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int); -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int); -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int); -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL); -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int); -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0); -- no rewrite + diff --git ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out index 2e06e61..469fbaa 100644 --- ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out @@ -741,3 +741,237 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### 400 +PREHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = null) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +POSTHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 diff --git ql/src/test/results/clientpositive/cbo_simple_select.q.out ql/src/test/results/clientpositive/cbo_simple_select.q.out index 2e06e61..a44388c 100644 --- ql/src/test/results/clientpositive/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_simple_select.q.out @@ -741,3 +741,237 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### 400 +PREHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = null) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +POSTHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 diff --git ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out index 2e06e61..f964cb6 100644 --- ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out @@ -741,3 +741,219 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### 400 +PREHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +POSTHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 diff --git ql/src/test/results/clientpositive/perf/spark/query33.q.out ql/src/test/results/clientpositive/perf/spark/query33.q.out index dcf2fef..f2a4eb5 100644 --- ql/src/test/results/clientpositive/perf/spark/query33.q.out +++ ql/src/test/results/clientpositive/perf/spark/query33.q.out @@ -232,7 +232,7 @@ STAGE PLANS: alias: item Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((i_category) IN ('Books') and i_manufact_id is not null) (type: boolean) + predicate: ((i_category = 'Books') and i_manufact_id is not null) (type: boolean) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_manufact_id (type: int) diff --git ql/src/test/results/clientpositive/perf/spark/query60.q.out ql/src/test/results/clientpositive/perf/spark/query60.q.out index 6f0c9cd..ec69e75 100644 --- ql/src/test/results/clientpositive/perf/spark/query60.q.out +++ ql/src/test/results/clientpositive/perf/spark/query60.q.out @@ -238,7 +238,7 @@ STAGE PLANS: alias: item Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((i_category) IN ('Children') and i_item_id is not null) (type: boolean) + predicate: ((i_category = 'Children') and i_item_id is not null) (type: boolean) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_id (type: string) diff --git ql/src/test/results/clientpositive/perf/spark/query89.q.out ql/src/test/results/clientpositive/perf/spark/query89.q.out index ca3a408..baec217 100644 --- ql/src/test/results/clientpositive/perf/spark/query89.q.out +++ ql/src/test/results/clientpositive/perf/spark/query89.q.out @@ -133,17 +133,17 @@ STAGE PLANS: alias: date_dim Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year) IN (2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_moy (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Reducer 2 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/perf/tez/query33.q.out ql/src/test/results/clientpositive/perf/tez/query33.q.out index a966865..008c7f8 100644 --- ql/src/test/results/clientpositive/perf/tez/query33.q.out +++ ql/src/test/results/clientpositive/perf/tez/query33.q.out @@ -222,7 +222,7 @@ Stage-0 Select Operator [SEL_5] (rows=231000 width=1436) Output:["i_manufact_id"] Filter Operator [FIL_159] (rows=231000 width=1436) - predicate:((i_category) IN ('Books') and i_manufact_id is not null) + predicate:((i_category = 'Books') and i_manufact_id is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"] <-Reducer 21 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/tez/query60.q.out ql/src/test/results/clientpositive/perf/tez/query60.q.out index 67102b3..42eb8bc 100644 --- ql/src/test/results/clientpositive/perf/tez/query60.q.out +++ ql/src/test/results/clientpositive/perf/tez/query60.q.out @@ -228,7 +228,7 @@ Stage-0 Select Operator [SEL_5] (rows=231000 width=1436) Output:["i_item_id"] Filter Operator [FIL_159] (rows=231000 width=1436) - predicate:((i_category) IN ('Children') and i_item_id is not null) + predicate:((i_category = 'Children') and i_item_id is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"] <-Reducer 21 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/tez/query89.q.out ql/src/test/results/clientpositive/perf/tez/query89.q.out index 7c37909..1beb703 100644 --- ql/src/test/results/clientpositive/perf/tez/query89.q.out +++ ql/src/test/results/clientpositive/perf/tez/query89.q.out @@ -115,10 +115,10 @@ Stage-0 <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_8] (rows=36525 width=1119) + Select Operator [SEL_8] (rows=36524 width=1119) Output:["_col0","_col2"] - Filter Operator [FIL_50] (rows=36525 width=1119) - predicate:((d_year) IN (2000) and d_date_sk is not null) + Filter Operator [FIL_50] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 2 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out index 2e06e61..f964cb6 100644 --- ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out @@ -741,3 +741,219 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### 400 +PREHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is 0 +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +POSTHOOK: query: -- c_int is not null +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int = null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +POSTHOOK: query: -- rewrite to NULL +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +POSTHOOK: query: -- no rewrite +EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cbo_t2 + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink + +PREHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite + +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is 0 +select count(*) from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- c_int is not null +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +0 +PREHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- rewrite to NULL +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 +PREHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: -- no rewrite +select count(*) from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +#### A masked pattern was here #### +18 diff --git ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index fd1179d..3321b09 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -43,8 +43,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@table_6 POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null @@ -90,8 +88,6 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT t1.decimal0801_col FROM table_19 t1 @@ -108,11 +104,9 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 , consists of Stage-5, Stage-1 - Stage-5 has a backup stage: Stage-1 - Stage-3 depends on stages: Stage-5 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-1 + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -179,119 +173,88 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 - Conditional Operator - - Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $hdt$_0:t1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-3 - Map Reduce - Map Operator Tree: + $hdt$_0:t1 TableScan alias: t1 Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: decimal0801_col is not null (type: boolean) + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 + HashTable Sink Operator keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 601 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2) IN (_col1) (type: boolean) - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(8,1)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values - vectorized: false - Local Work: - Map Reduce Local Work + 0 _col1 (type: int) + 1 _col0 (type: int) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: t1 - Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: decimal0801_col is not null (type: boolean) - Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(8,1)), _col1 (type: int) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 601 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2) IN (_col1) (type: boolean) - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(8,1)) + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:int] + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 633 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 633 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,1)] + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator