diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index dccb598..35641b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -38,6 +38,8 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -53,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; @@ -138,6 +141,13 @@ public static boolean onlyContainsPartnCols(Table tab, ExprNodeDesc expr) { */ public static PrunedPartitionList prune(TableScanOperator ts, ParseContext parseCtx, String alias) throws SemanticException { + if ("strict".equalsIgnoreCase(HiveConf.getVar(parseCtx.getConf(), HiveConf.ConfVars.HIVEMAPREDMODE)) + && !isPartitionsFiltered(ts)) { + // If the "strict" mode is on, we have to provide partition pruner for each table. + throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE + .getMsg("for Alias \"" + alias + "\" Table \"" + ts.getConf().getTableMetadata().getTableName() + "\"")); + } + return prune(ts.getConf().getTableMetadata(), parseCtx.getOpToPartPruner().get(ts), parseCtx.getConf(), alias, parseCtx.getPrunedPartitions()); } @@ -178,13 +188,6 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap); } - if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE)) - && !hasColumnExpr(prunerExpr)) { - // If the "strict" mode is on, we have to provide partition pruner for each table. - throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE - .getMsg("for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"")); - } - if (prunerExpr == null) { // In non-strict mode and there is no predicates at all - get everything. return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap); @@ -532,22 +535,74 @@ public static boolean prunePartitionNames(List partColumnNames, } /** - * Whether the expression contains a column node or not. + * Return true if the table is not partitioned or the partitions are filtered + * @param ts TableScanOperator + * @return + */ + private static boolean isPartitionsFiltered(TableScanOperator ts) { + return (ts != null && !ts.getConf().getTableMetadata().isPartitioned()) + || hasColumnExpr(ts.getConf().getTableMetadata(), ts); + } + + /** + * Check if the operator or its children contains a partition key to + * filter the table. + * @param op + * @return + */ + private static boolean hasColumnExpr(Table table, Operator op) { + if (op == null) { + return false; + } + + if (op instanceof TableScanOperator) { + TableScanOperator ts = (TableScanOperator)op; + if (hasColumnExpr(table, ts.getConf().getFilterExpr())) { + return true; + } + } + + if (op instanceof FilterOperator) { + FilterOperator fop = (FilterOperator)op; + + if (hasColumnExpr(table, fop.getConf().getPredicate())) { + return true; + } + } + + for(Operator childOP : op.getChildOperators() ) { + if (hasColumnExpr(table, childOP)) { + return true; + } + } + + return false; + } + + /** + * Check if the expression contains a partition key to filter the table. */ - public static boolean hasColumnExpr(ExprNodeDesc desc) { + private static boolean hasColumnExpr(Table table, ExprNodeDesc desc) { + if (!table.isPartitioned()) { + return true; + } + // Return false for null if (desc == null) { return false; } - // Return true for exprNodeColumnDesc - if (desc instanceof ExprNodeColumnDesc) { + + // Return true if the column is partition key + if (desc instanceof ExprNodeColumnDesc + && table.isPartitionKey(((ExprNodeColumnDesc) desc).getColumn())) { return true; } + // Return true in case one of the children is column expr. List children = desc.getChildren(); if (children != null) { for (int i = 0; i < children.size(); i++) { - if (hasColumnExpr(children.get(i))) { + if (hasColumnExpr(table, children.get(i))) { return true; } } diff --git a/ql/src/test/queries/clientpositive/query_partitioned_table.q b/ql/src/test/queries/clientpositive/query_partitioned_table.q new file mode 100644 index 0000000..eebcdec --- /dev/null +++ b/ql/src/test/queries/clientpositive/query_partitioned_table.q @@ -0,0 +1,11 @@ +-- Test the query against partitioned table with strict mode + +create table if not exists query_partitioned(key string) partitioned by (ds string); + +insert overwrite table query_partitioned partition (ds="2010-03-03") +select '11' from src limit 1; + +set hive.mapred.mode=strict; +select * from query_partitioned where ds < to_date(date_add(from_unixtime( unix_timestamp() ),1)); + + diff --git a/ql/src/test/results/clientpositive/query_partitioned_table.q.out b/ql/src/test/results/clientpositive/query_partitioned_table.q.out new file mode 100644 index 0000000..b12826e --- /dev/null +++ b/ql/src/test/results/clientpositive/query_partitioned_table.q.out @@ -0,0 +1,34 @@ +PREHOOK: query: -- Test the query against partitioned table with strict mode + +create table if not exists query_partitioned(key string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@query_partitioned +POSTHOOK: query: -- Test the query against partitioned table with strict mode + +create table if not exists query_partitioned(key string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@query_partitioned +PREHOOK: query: insert overwrite table query_partitioned partition (ds="2010-03-03") +select '11' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@query_partitioned@ds=2010-03-03 +POSTHOOK: query: insert overwrite table query_partitioned partition (ds="2010-03-03") +select '11' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@query_partitioned@ds=2010-03-03 +POSTHOOK: Lineage: query_partitioned PARTITION(ds=2010-03-03).key SIMPLE [] +PREHOOK: query: select * from query_partitioned where ds < to_date(date_add(from_unixtime( unix_timestamp() ),1)) +PREHOOK: type: QUERY +PREHOOK: Input: default@query_partitioned +PREHOOK: Input: default@query_partitioned@ds=2010-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select * from query_partitioned where ds < to_date(date_add(from_unixtime( unix_timestamp() ),1)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@query_partitioned +POSTHOOK: Input: default@query_partitioned@ds=2010-03-03 +#### A masked pattern was here #### +11 2010-03-03