diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 9ca194a..e035201 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -64,9 +64,11 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde.serdeConstants; @@ -372,17 +374,30 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List if (udf instanceof GenericUDFOPAnd) { for (int i = 0; i < 2; i++) { ExprNodeDesc childExpr = newExprs.get(i); + ExprNodeDesc other = newExprs.get(Math.abs(i - 1)); if (childExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr; if (Boolean.TRUE.equals(c.getValue())) { // if true, prune it - return newExprs.get(Math.abs(i - 1)); + return other; } else { // if false return false return childExpr; } + } else if (childExpr instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc)childExpr).getGenericUDF() instanceof GenericUDFOPNotNull && + childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc && other instanceof ExprNodeGenericFuncDesc + && ((ExprNodeGenericFuncDesc)other).getGenericUDF() instanceof GenericUDFBaseCompare + && other.getChildren().size() == 2) { + ExprNodeColumnDesc colDesc = getColumnExpr(other.getChildren().get(0)); + if (null == colDesc) { + colDesc = getColumnExpr(other.getChildren().get(1)); + } + if (null != colDesc && colDesc.isSame(childExpr.getChildren().get(0))) { + return other; + } } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1b7a41d..511103b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2662,7 +2662,10 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, if (joinKeys == null || joinKeys.length == 0) { return input; } - + Map hashes = new HashMap(); + if (input instanceof FilterOperator) { + ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc)input.getConf()).getPredicate()), hashes); + } ExprNodeDesc filterPred = null; List nullSafes = joinTree.getNullSafes(); for (int i = 0; i < joinKeys.length; i++) { @@ -2672,6 +2675,10 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, // virtual column, since those columns can never be null. continue; } + if(null != hashes.get(joinKeys[i].hashCode())) { + // there is already a predicate on this src. + continue; + } List args = new ArrayList(); args.add(joinKeys[i]); ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance( diff --git a/ql/src/test/results/clientpositive/constprog2.q.out b/ql/src/test/results/clientpositive/constprog2.q.out index 50ff890..71a44f3 100644 --- a/ql/src/test/results/clientpositive/constprog2.q.out +++ b/ql/src/test/results/clientpositive/constprog2.q.out @@ -18,23 +18,23 @@ STAGE PLANS: alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 86) and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: '86' (type: string) sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 86) and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: '86' (type: string) sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -43,14 +43,14 @@ STAGE PLANS: 0 1 {VALUE._col0} outputColumnNames: _col6 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '86' (type: string), 87.0 (type: double), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -93,23 +93,23 @@ STAGE PLANS: alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) = 86) and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: '86' (type: string) sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) = 86) and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: '86' (type: string) sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -118,14 +118,14 @@ STAGE PLANS: 0 1 {VALUE._col0} outputColumnNames: _col6 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '86' (type: string), 87.0 (type: double), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat