diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 410735c..b96eaa6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -520,12 +520,16 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List // if false return false return childExpr; } - } else // Try to fold (key = 86) and (key is not null) to (key = 86) - if (childExpr instanceof ExprNodeGenericFuncDesc && - ((ExprNodeGenericFuncDesc)childExpr).getGenericUDF() instanceof GenericUDFOPNotNull && - childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc && other instanceof ExprNodeGenericFuncDesc - && ((ExprNodeGenericFuncDesc)other).getGenericUDF() instanceof GenericUDFBaseCompare - && other.getChildren().size() == 2) { + } else if (childExpr instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc)childExpr).getGenericUDF() instanceof GenericUDFOPNotNull && + childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc && + other instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc)other).getGenericUDF() instanceof GenericUDFBaseCompare && + !(((ExprNodeGenericFuncDesc)other).getGenericUDF() instanceof GenericUDFOPNotEqual) && + other.getChildren().size() == 2) { + // Try to fold (key 86) and (key is not null) to (key 86) + // where can be "=", ">=", "<=", ">", "<". + // Note: (key <> 86) and (key is not null) cannot be folded ExprNodeColumnDesc colDesc = getColumnExpr(other.getChildren().get(0)); if (null == colDesc) { colDesc = getColumnExpr(other.getChildren().get(1)); diff --git a/ql/src/test/queries/clientpositive/folder_predicate.q b/ql/src/test/queries/clientpositive/folder_predicate.q new file mode 100644 index 0000000..7581020 --- /dev/null +++ b/ql/src/test/queries/clientpositive/folder_predicate.q @@ -0,0 +1,28 @@ +drop table if exists predicate_fold_tb; + +create table predicate_fold_tb(value int); +insert into predicate_fold_tb values(NULL), (1), (2), (3), (4), (5); + +explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value = 3; +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value = 3; + +explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value >= 3; +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value >= 3; + +explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <= 3; +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <= 3; + +explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value > 3; +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value > 3; + +explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value < 3; +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value < 3; + +explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <> 3; +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <> 3; diff --git a/ql/src/test/results/clientpositive/folder_predicate.q.out b/ql/src/test/results/clientpositive/folder_predicate.q.out new file mode 100644 index 0000000..bb379d9 --- /dev/null +++ b/ql/src/test/results/clientpositive/folder_predicate.q.out @@ -0,0 +1,312 @@ +PREHOOK: query: drop table if exists predicate_fold_tb +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists predicate_fold_tb +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table predicate_fold_tb(value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@predicate_fold_tb +POSTHOOK: query: create table predicate_fold_tb(value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@predicate_fold_tb +PREHOOK: query: insert into predicate_fold_tb values(NULL), (1), (2), (3), (4), (5) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@predicate_fold_tb +POSTHOOK: query: insert into predicate_fold_tb values(NULL), (1), (2), (3), (4), (5) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@predicate_fold_tb +POSTHOOK: Lineage: predicate_fold_tb.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value = 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value = 3) (type: boolean) + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +3 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value >= 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value >= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value >= 3) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value >= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value >= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <= 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value <= 3) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value > 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value > 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 3) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value > 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value > 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +4 +5 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value < 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value < 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value < 3) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value < 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value < 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +1 +2 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <> 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <> 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + Statistics: Num rows: 6 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and (value <> 3)) (type: boolean) + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <> 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE value IS NOT NULL AND value <> 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +1 +2 +4 +5