diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index e43d39f..1796b7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -56,7 +56,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** @@ -188,12 +190,18 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, // Replace virtual columns with nulls. See javadoc for details. prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter); // Remove all parts that are not partition columns. See javadoc for details. - ExprNodeGenericFuncDesc compactExpr = (ExprNodeGenericFuncDesc)compactExpr(prunerExpr.clone()); + ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone()); String oldFilter = prunerExpr.getExprString(); - if (compactExpr == null) { - // Non-strict mode, and all the predicates are on non-partition columns - get everything. - LOG.debug("Filter " + oldFilter + " was null after compacting"); - return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap); + if (isBooleanExpr(compactExpr)) { + // For null and true values, return every partition + if (!isFalseExpr(compactExpr)) { + // Non-strict mode, and all the predicates are on non-partition columns - get everything. + LOG.debug("Filter " + oldFilter + " was null after compacting"); + return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap); + } else { + return new PrunedPartitionList(tab, new LinkedHashSet(new ArrayList()), + new ArrayList(), false); + } } LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() + "; filter w/o compacting: " + oldFilter); @@ -204,7 +212,7 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, return ppList; } - ppList = getPartitionsFromServer(tab, compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString())); + ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc)compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString())); prunedPartitionsMap.put(key, ppList); return ppList; } @@ -225,16 +233,22 @@ private static PrunedPartitionList getAllPartsFromCacheOrServer(Table tab, Strin partsCache.put(key, ppList); return ppList; } - - private static ExprNodeDesc removeTruePredciates(ExprNodeDesc e) { - if (e instanceof ExprNodeConstantDesc) { - ExprNodeConstantDesc eC = (ExprNodeConstantDesc) e; - if (e.getTypeInfo() == TypeInfoFactory.booleanTypeInfo - && eC.getValue() == Boolean.TRUE) { - return null; - } - } - return e; + + static private boolean isBooleanExpr(ExprNodeDesc expr) { + return expr != null && expr instanceof ExprNodeConstantDesc && + ((ExprNodeConstantDesc)expr).getTypeInfo() instanceof PrimitiveTypeInfo && + ((PrimitiveTypeInfo)(((ExprNodeConstantDesc)expr).getTypeInfo())). + getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME); + } + static private boolean isTrueExpr(ExprNodeDesc expr) { + return isBooleanExpr(expr) && + ((ExprNodeConstantDesc)expr).getValue() != null && + ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.TRUE); + } + static private boolean isFalseExpr(ExprNodeDesc expr) { + return isBooleanExpr(expr) && + ((ExprNodeConstantDesc)expr).getValue() != null && + ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.FALSE); } /** @@ -245,10 +259,13 @@ private static ExprNodeDesc removeTruePredciates(ExprNodeDesc e) { * @return partition pruning expression that only contains partition columns. */ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { - if (expr instanceof ExprNodeConstantDesc) { - expr = removeTruePredciates(expr); - if (expr == null || ((ExprNodeConstantDesc)expr).getValue() == null) { - return null; + // If this is a constant boolean expression, return the value. + if (expr == null) { + return null; + } + if (expr instanceof ExprNodeConstantDesc) { + if (isBooleanExpr(expr)) { + return expr; } else { throw new IllegalStateException("Unexpected non-null ExprNodeConstantDesc: " + expr.getExprString()); @@ -256,22 +273,29 @@ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { } else if (expr instanceof ExprNodeGenericFuncDesc) { GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF(); boolean isAnd = udf instanceof GenericUDFOPAnd; - if (isAnd || udf instanceof GenericUDFOPOr) { + boolean isOr = udf instanceof GenericUDFOPOr; + + if (isAnd || isOr) { List children = expr.getChildren(); - ExprNodeDesc left = removeTruePredciates(children.get(0)); - children.set(0, left == null ? null : compactExpr(left)); - ExprNodeDesc right = removeTruePredciates(children.get(1)); - children.set(1, right == null ? null : compactExpr(right)); - - // Note that one does not simply compact (not-null or null) to not-null. - // Only if we have an "and" is it valid to send one side to metastore. - if (children.get(0) == null && children.get(1) == null) { - return null; - } else if (children.get(0) == null) { - return isAnd ? children.get(1) : null; - } else if (children.get(1) == null) { - return isAnd ? children.get(0) : null; - } + ExprNodeDesc left = children.get(0); + children.set(0, compactExpr(left)); + ExprNodeDesc right = children.get(1); + children.set(1, compactExpr(right)); + + if (isTrueExpr(children.get(0)) && isTrueExpr(children.get(1))) { + return new ExprNodeConstantDesc(Boolean.TRUE); + } else if (isTrueExpr(children.get(0))) { + return isAnd ? children.get(1) : new ExprNodeConstantDesc(Boolean.TRUE); + } else if (isTrueExpr(children.get(1))) { + return isAnd ? children.get(0) : new ExprNodeConstantDesc(Boolean.TRUE); + } else if (isFalseExpr(children.get(0)) && isFalseExpr(children.get(1))) { + return new ExprNodeConstantDesc(Boolean.FALSE); + } else if (isFalseExpr(children.get(0))) { + return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(1); + } else if (isFalseExpr(children.get(1))) { + return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(0); + } + } return expr; } else { @@ -296,9 +320,9 @@ static private ExprNodeDesc removeNonPartCols(ExprNodeDesc expr, List pa if (!partCols.contains(column)) { // Column doesn't appear to be a partition column for the table. return new ExprNodeConstantDesc(expr.getTypeInfo(), null); - } + } referred.add(column); - } + } if (expr instanceof ExprNodeGenericFuncDesc) { List children = expr.getChildren(); for (int i = 0; i < children.size(); ++i) { diff --git ql/src/test/queries/clientpositive/partition_boolexpr.q ql/src/test/queries/clientpositive/partition_boolexpr.q index e69de29..3394d7d 100644 --- ql/src/test/queries/clientpositive/partition_boolexpr.q +++ ql/src/test/queries/clientpositive/partition_boolexpr.q @@ -0,0 +1,12 @@ +-- create testing table. +create table part_boolexpr(key int, value string) partitioned by (dt int, ts string); + +-- both the below queries should return 0 rows +select count(*) from part_boolexpr where key = 'abc'; +select * from part_boolexpr where dt = 'abc'; +explain select count(1) from srcpart where true; +explain select count(1) from srcpart where false; +explain select count(1) from srcpart where true and hr='11'; +explain select count(1) from srcpart where true or hr='11'; +explain select count(1) from srcpart where false or hr='11'; +explain select count(1) from srcpart where false and hr='11'; \ No newline at end of file diff --git ql/src/test/results/clientpositive/partition_boolexpr.q.out ql/src/test/results/clientpositive/partition_boolexpr.q.out index e69de29..7d414ff 100644 --- ql/src/test/results/clientpositive/partition_boolexpr.q.out +++ ql/src/test/results/clientpositive/partition_boolexpr.q.out @@ -0,0 +1,299 @@ +PREHOOK: query: -- create testing table. +create table part_boolexpr(key int, value string) partitioned by (dt int, ts string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_boolexpr +POSTHOOK: query: -- create testing table. +create table part_boolexpr(key int, value string) partitioned by (dt int, ts string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_boolexpr +PREHOOK: query: -- both the below queries should return 0 rows +select count(*) from part_boolexpr where key = 'abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@part_boolexpr +#### A masked pattern was here #### +POSTHOOK: query: -- both the below queries should return 0 rows +select count(*) from part_boolexpr where key = 'abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_boolexpr +#### A masked pattern was here #### +0 +PREHOOK: query: select * from part_boolexpr where dt = 'abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@part_boolexpr +#### A masked pattern was here #### +POSTHOOK: query: select * from part_boolexpr where dt = 'abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_boolexpr +#### A masked pattern was here #### +PREHOOK: query: explain select count(1) from srcpart where true +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from srcpart where true +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(1) from srcpart where false +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from srcpart where false +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(1) from srcpart where true and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from srcpart where true and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(1) from srcpart where true or hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from srcpart where true or hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(1) from srcpart where false or hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from srcpart where false or hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(1) from srcpart where false and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from srcpart where false and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +