diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index d182d80..ea200db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -69,6 +69,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; @@ -726,8 +727,18 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List } else if(thenVal.equals(elseVal)){ return thenExpr; } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) { - return Boolean.TRUE.equals(thenVal) ? whenExpr : - ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), newExprs.subList(0, 1)); + List children = new ArrayList<>(); + children.add(whenExpr); + children.add(new ExprNodeConstantDesc(false)); + ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(), + children); + if (Boolean.TRUE.equals(thenVal)) { + return func; + } else { + List exprs = new ArrayList<>(); + exprs.add(func); + return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs); + } } else { return null; } @@ -767,8 +778,20 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List } else if(thenVal.equals(elseVal)){ return thenExpr; } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) { - return Boolean.TRUE.equals(thenVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2)) : - ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNotEqual(), newExprs.subList(0, 2)); + ExprNodeGenericFuncDesc equal = ExprNodeGenericFuncDesc.newInstance( + new GenericUDFOPEqual(), newExprs.subList(0, 2)); + List children = new ArrayList<>(); + children.add(equal); + children.add(new ExprNodeConstantDesc(false)); + ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(), + children); + if (Boolean.TRUE.equals(thenVal)) { + return func; + } else { + List exprs = new ArrayList<>(); + exprs.add(func); + return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs); + } } else { return null; } diff --git a/ql/src/test/queries/clientpositive/constantPropWhen.q b/ql/src/test/queries/clientpositive/constantPropWhen.q new file mode 100644 index 0000000..c1d4885 --- /dev/null +++ b/ql/src/test/queries/clientpositive/constantPropWhen.q @@ -0,0 +1,43 @@ +set hive.mapred.mode=nonstrict; + +drop table test_1; + +create table test_1 (id int, id2 int); + +insert into table test_1 values (123, NULL), (NULL, NULL), (NULL, 123), (123, 123); + +explain SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +explain SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +explain SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + +explain SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + + +set hive.cbo.enable=false; + +explain SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +explain SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1; + +explain SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + +explain SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + +SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1; + diff --git a/ql/src/test/results/clientpositive/constantPropWhen.q.out b/ql/src/test/results/clientpositive/constantPropWhen.q.out new file mode 100644 index 0000000..ca73454 --- /dev/null +++ b/ql/src/test/results/clientpositive/constantPropWhen.q.out @@ -0,0 +1,382 @@ +PREHOOK: query: drop table test_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test_1 (id int, id2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: create table test_1 (id int, id2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: insert into table test_1 values (123, NULL), (NULL, NULL), (NULL, 123), (123, 123) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test_1 +POSTHOOK: query: insert into table test_1 values (123, NULL), (NULL, NULL), (NULL, 123), (123, 123) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_1.id2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (not if (id = id2) is null returnsfalse) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +true +true +true +false +PREHOOK: query: explain SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (not if (id = id2) is null returnsfalse) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +true +true +true +false +PREHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: if (id = id2) is null returnsfalse (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +false +false +false +true +PREHOOK: query: explain SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: if (id = id2) is null returnsfalse (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +false +false +false +true +PREHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (not if (id = id2) is null returnsfalse) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +true +true +true +false +PREHOOK: query: explain SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (not if (id = id2) is null returnsfalse) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +true +true +true +false +PREHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: if (id = id2) is null returnsfalse (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +false +false +false +true +PREHOOK: query: explain SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: if (id = id2) is null returnsfalse (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +false +false +false +true