Index: ql/src/test/results/clientnegative/part_pred_no_effect_1.q.out =================================================================== --- ql/src/test/results/clientnegative/part_pred_no_effect_1.q.out (revision 0) +++ ql/src/test/results/clientnegative/part_pred_no_effect_1.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Partition predicate has no effect. Please check the query or set to nonstrict mode for Alias "srcpart" Table "srcpart" Index: ql/src/test/results/clientnegative/part_pred_no_effect_2.q.out =================================================================== --- ql/src/test/results/clientnegative/part_pred_no_effect_2.q.out (revision 0) +++ ql/src/test/results/clientnegative/part_pred_no_effect_2.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Partition predicate has no effect. Please check the query or set to nonstrict mode for Alias "srcpart" Table "srcpart" Index: ql/src/test/results/clientpositive/partition_predicate.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_predicate.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_predicate.q.out (revision 0) @@ -0,0 +1,195 @@ +PREHOOK: query: EXPLAIN +SELECT key FROM srcpart WHERE ds = '2008-04-08' and (key>20 or key<20) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM srcpart WHERE ds = '2008-04-08' and (key>20 or key<20) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF srcpart)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (or (> (TOK_TABLE_OR_COL key) 20) (< (TOK_TABLE_OR_COL key) 20)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and ((key > 20) or (key < 20))) + type: boolean + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and ((key > 20) or (key < 20))) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key FROM srcpart WHERE ds = '2008-04-08' and (key>20 or key<20) limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-23_11-13-36_161_1513012572139265445/10000 +POSTHOOK: query: SELECT key FROM srcpart WHERE ds = '2008-04-08' and (key>20 or key<20) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-23_11-13-36_161_1513012572139265445/10000 +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +PREHOOK: query: EXPLAIN +SELECT key FROM srcpart WHERE NOT(ds = '2008-04-08' or key>20 ) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM srcpart WHERE NOT(ds = '2008-04-08' or key>20 ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF srcpart)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (NOT (or (= (TOK_TABLE_OR_COL ds) '2008-04-08') (> (TOK_TABLE_OR_COL key) 20)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (not ((ds = '2008-04-08') or (key > 20))) + type: boolean + Filter Operator + predicate: + expr: (not ((ds = '2008-04-08') or (key > 20))) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key FROM srcpart WHERE NOT(ds = '2008-04-08' or key>20 ) limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-23_11-13-45_192_762457972075388621/10000 +POSTHOOK: query: SELECT key FROM srcpart WHERE NOT(ds = '2008-04-08' or key>20 ) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-23_11-13-45_192_762457972075388621/10000 +15 +17 +0 +20 +4 +12 +8 +0 +0 +15 +PREHOOK: query: EXPLAIN +SELECT key FROM srcpart WHERE ds != '2008-04-08' and key>20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM srcpart WHERE ds != '2008-04-08' and key>20 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF srcpart)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (and (!= (TOK_TABLE_OR_COL ds) '2008-04-08') (> (TOK_TABLE_OR_COL key) 20))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: ((ds <> '2008-04-08') and (key > 20)) + type: boolean + Filter Operator + predicate: + expr: ((ds <> '2008-04-08') and (key > 20)) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key FROM srcpart WHERE ds != '2008-04-08' and key>20 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-23_11-13-50_830_9104308579799051387/10000 +POSTHOOK: query: SELECT key FROM srcpart WHERE ds != '2008-04-08' and key>20 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-23_11-13-50_830_9104308579799051387/10000 +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 Index: ql/src/test/queries/clientnegative/part_pred_no_effect_1.q =================================================================== --- ql/src/test/queries/clientnegative/part_pred_no_effect_1.q (revision 0) +++ ql/src/test/queries/clientnegative/part_pred_no_effect_1.q (revision 0) @@ -0,0 +1,2 @@ +SET hive.mapred.mode = strict; +SELECT key FROM srcpart WHERE ds = '2008-04-08' and key>20 or key<20; \ No newline at end of file Index: ql/src/test/queries/clientnegative/part_pred_no_effect_2.q =================================================================== --- ql/src/test/queries/clientnegative/part_pred_no_effect_2.q (revision 0) +++ ql/src/test/queries/clientnegative/part_pred_no_effect_2.q (revision 0) @@ -0,0 +1,2 @@ +SET hive.mapred.mode = strict; +SELECT key FROM srcpart WHERE ds = '2008-04-08' or ds != '2008-04-08' or key>20; \ No newline at end of file Index: ql/src/test/queries/clientpositive/partition_predicate.q =================================================================== --- ql/src/test/queries/clientpositive/partition_predicate.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_predicate.q (revision 0) @@ -0,0 +1,15 @@ +SET hive.mapred.mode = strict; +EXPLAIN +SELECT key FROM srcpart WHERE ds = '2008-04-08' and (key>20 or key<20); + +SELECT key FROM srcpart WHERE ds = '2008-04-08' and (key>20 or key<20) limit 10; + +EXPLAIN +SELECT key FROM srcpart WHERE NOT(ds = '2008-04-08' or key>20 ); + +SELECT key FROM srcpart WHERE NOT(ds = '2008-04-08' or key>20 ) limit 10; + +EXPLAIN +SELECT key FROM srcpart WHERE ds != '2008-04-08' and key>20; + +SELECT key FROM srcpart WHERE ds != '2008-04-08' and key>20 limit 10; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (working copy) @@ -55,7 +55,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -129,8 +128,9 @@ boolean tableSorted = true; QBJoinTree joinCxt = this.pGraphContext.getMapJoinContext() .get(mapJoinOp); - if (joinCxt == null) + if (joinCxt == null) { return null; + } String[] srcs = joinCxt.getBaseSrc(); int pos = 0; for (String src : srcs) { @@ -140,7 +140,7 @@ } if (!tableSorted) { //this is a mapjoin but not suit for a sort merge bucket map join. check outer joins - MapJoinProcessor.checkMapJoin(((MapJoinOperator) nd).getConf().getPosBigTable(), + MapJoinProcessor.checkMapJoin(((MapJoinOperator) nd).getConf().getPosBigTable(), ((MapJoinOperator) nd).getConf().getConds()); return null; } @@ -160,7 +160,7 @@ tagToAlias.put((byte) i, srcs[i]); } smbJoinDesc.setTagToAlias(tagToAlias); - + int indexInListMapJoinNoReducer = this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp); if(indexInListMapJoinNoReducer >= 0 ) { this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer); @@ -191,8 +191,9 @@ Map topToTable = this.pGraphContext .getTopToTable(); TableScanOperator tso = (TableScanOperator) topOps.get(alias); - if (tso == null) + if (tso == null) { return false; + } List keys = op.getConf().getKeys().get((byte) pos); // get all join columns from join keys stored in MapJoinDesc @@ -217,9 +218,9 @@ if (tbl.isPartitioned()) { PrunedPartitionList prunedParts = null; try { - prunedParts = PartitionPruner.prune(tbl, pGraphContext - .getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); + prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), + pGraphContext.getOpToNullPartPruner().get(tso), + pGraphContext.getConf(), alias, pGraphContext.getPrunedPartitions(), pGraphContext); } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java (working copy) @@ -31,6 +31,8 @@ */ String tabAlias; + boolean probParitionPredicate = false; + /** * Flag to hold whether there are any non partition columns accessed in the * expression. @@ -57,4 +59,12 @@ public void setHasNonPartCols(boolean val) { hasNonPartCols = val; } + + public boolean getProbParitionPredicate() { + return probParitionPredicate; + } + + public void setProbParitionPredicate(boolean probParitionPredicate) { + this.probParitionPredicate = probParitionPredicate; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (working copy) @@ -29,7 +29,7 @@ */ public class OpWalkerCtx implements NodeProcessorCtx { - private boolean hasNonPartCols; + private final HashMap hasNonPartCols; /** * Map from tablescan operator to partition pruning predicate that is @@ -38,22 +38,30 @@ private final HashMap opToPartPruner; /** + * Map from tablescan operator to partition pruning predicate that is + * used to probe if the partition predicate has effect or not. + */ + private final HashMap opToNullPartPruner; + + /** * Constructor. */ - public OpWalkerCtx(HashMap opToPartPruner) { + public OpWalkerCtx(HashMap opToPartPruner, + HashMap opToPartProbePruner, HashMap hasNonPartCols) { this.opToPartPruner = opToPartPruner; - hasNonPartCols = false; + this.hasNonPartCols = hasNonPartCols; + opToNullPartPruner = opToPartProbePruner; } public HashMap getOpToPartPruner() { return opToPartPruner; } - public void addHasNonPartCols(boolean val) { - hasNonPartCols = (hasNonPartCols || val); + public HashMap getOpToNullPartPruner() { + return opToNullPartPruner; } - public boolean getHasNonPartCols() { + public HashMap getHasNonPartCols() { return hasNonPartCols; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -59,7 +60,7 @@ /** * The transformation step that does partition pruning. - * + * */ public class PartitionPruner implements Transform { @@ -69,7 +70,7 @@ /* * (non-Javadoc) - * + * * @see * org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop * .hive.ql.parse.ParseContext) @@ -78,7 +79,8 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // create a the context for walking operators - OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner()); + HashMap hasNonPartCols = pctx.getHasNonPartCols(); + OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner(), pctx.getOpToNullPartPruner(), hasNonPartCols); Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", "(TS%FIL%)|(TS%FIL%FIL%)"), OpProcFactory @@ -103,7 +105,7 @@ * Find out whether the condition only contains partitioned columns. Note that * if the table is not partitioned, the function always returns true. * condition. - * + * * @param tab * the table object * @param expr @@ -142,7 +144,7 @@ /** * Get the partition list for the table that satisfies the partition pruner * condition. - * + * * @param tab * the table object for the alias * @param prunerExpr @@ -151,13 +153,14 @@ * for checking whether "strict" mode is on. * @param alias * for generating error message only. + * @param pGraphContext * @return the partition list for the table that satisfies the partition * pruner condition. * @throws HiveException */ - public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, + public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr,ExprNodeDesc nullPartPrunerExpr, HiveConf conf, String alias, - Map prunedPartitionsMap) throws HiveException { + Map prunedPartitionsMap, ParseContext pGraphContext) throws HiveException { LOG.trace("Started pruning partiton"); LOG.trace("tabname = " + tab.getTableName()); LOG.trace("prune Expression = " + prunerExpr); @@ -179,6 +182,7 @@ StructObjectInspector rowObjectInspector = (StructObjectInspector) tab .getDeserializer().getObjectInspector(); Object[] rowWithPart = new Object[2]; + Object[] rowWithNullPart = new Object[2]; if (tab.isPartitioned()) { for (String partName : Hive.get().getPartitionNames(tab.getDbName(), @@ -220,8 +224,14 @@ // evaluate the expression tree if (prunerExpr != null) { + if("strict".equalsIgnoreCase(HiveConf.getVar(conf, + HiveConf.ConfVars.HIVEMAPREDMODE))) { + probePartitionPredicateEffect(tab, prunerExpr, nullPartPrunerExpr, + rowWithNullPart, rowWithPartObjectInspector, alias, pGraphContext); + } + ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory - .get(prunerExpr); + .get(prunerExpr); ObjectInspector evaluateResultOI = evaluator .initialize(rowWithPartObjectInspector); Object evaluateResultO = evaluator.evaluate(rowWithPart); @@ -267,6 +277,28 @@ return ret; } + private static void probePartitionPredicateEffect(Table tab, + ExprNodeDesc prunerExpr, ExprNodeDesc nullPartPrunerExpr, + Object[] rowWithNullPart, StructObjectInspector rowWithPartObjectInspector, String alias, ParseContext pGraphContext) + throws HiveException { + if (pGraphContext.getHasNonPartCols() != null + && pGraphContext.getHasNonPartCols().get(prunerExpr) != null + && pGraphContext.getHasNonPartCols().get(prunerExpr)) { + ExprNodeEvaluator probeEvaluator = ExprNodeEvaluatorFactory + .get(nullPartPrunerExpr); + ObjectInspector evaluateResultOI = probeEvaluator + .initialize(rowWithPartObjectInspector); + Object evaluateResultO = probeEvaluator.evaluate(rowWithNullPart); + Boolean r = (Boolean) ((PrimitiveObjectInspector) evaluateResultOI) + .getPrimitiveJavaObject(evaluateResultO); + if (Boolean.TRUE.equals(r)) { + throw new SemanticException(ErrorMsg.PARTITION_PREDICATE_NO_EFFECT + .getMsg("for Alias \"" + alias + "\" Table \"" + + tab.getTableName() + "\"")); + } + } + } + /** * Whether the expression contains a column node or not. */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (working copy) @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.ppr.OpProcFactory.FilterPPR.ExprPrunerResult; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -42,6 +43,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** * Expression processor factory for partition pruning. Each processor tries to @@ -69,7 +71,11 @@ ExprProcCtx epc = (ExprProcCtx) procCtx; if (cd.getTabAlias().equalsIgnoreCase(epc.getTabAlias()) && cd.getIsParititonCol()) { - newcd = cd.clone(); + if(!epc.probParitionPredicate) { + newcd = cd.clone(); + } else { + newcd = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE); + } } else { newcd = new ExprNodeConstantDesc(cd.getTypeInfo(), null); epc.setHasNonPartCols(true); @@ -93,8 +99,10 @@ ExprNodeDesc newfd = null; ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd; + ExprProcCtx epc = (ExprProcCtx) procCtx; boolean unknown = false; + boolean okToGenProbe = true; if (FunctionRegistry.isOpAndOrNot(fd)) { // do nothing because "And" and "Or" and "Not" supports null value @@ -104,9 +112,11 @@ // values) should derive from a common base class UDFNullAsUnknown, so // instead of listing the classes // here we would test whether a class is derived from that base class. + okToGenProbe = false; } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { // If it's a non-deterministic UDF, set unknown to true unknown = true; + okToGenProbe = false; } else { // If any child is null, set unknown to true for (Object child : nodeOutputs) { @@ -118,6 +128,15 @@ } } + if (epc.probParitionPredicate && okToGenProbe) { + if(unknown) { + newfd = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE); + } else { + newfd = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE); + } + return newfd; + } + if (unknown) { newfd = new ExprNodeConstantDesc(fd.getTypeInfo(), null); } else { @@ -211,7 +230,7 @@ /** * Generates the partition pruner for the expression tree. - * + * * @param tabAlias * The table alias of the partition table that is being considered * for pruning @@ -222,11 +241,28 @@ * has a non partition column * @throws SemanticException */ - public static ExprNodeDesc genPruner(String tabAlias, ExprNodeDesc pred, - boolean hasNonPartCols) throws SemanticException { - // Create the walker, the rules dispatcher and the context. + public static ExprPrunerResult genPruner(String tabAlias, ExprNodeDesc pred) throws SemanticException { + + ExprPrunerResult ret = new ExprPrunerResult(); + ExprProcCtx pprCtx = new ExprProcCtx(tabAlias); + HashMap outputMap = new HashMap(); + genPruner(pred, pprCtx, outputMap); // gen the real partition predicate + ret.ppr_pred = (ExprNodeDesc) outputMap.get(pred); + ret.hasNonPartCols = pprCtx.getHasNonPartCols(); + pprCtx = new ExprProcCtx(tabAlias); + pprCtx.probParitionPredicate = true; + outputMap = new HashMap(); + genPruner(pred, pprCtx, outputMap); // gen the probe partition predicate + ret.ppr_prob_pred = (ExprNodeDesc) outputMap.get(pred); + // Get the exprNodeDesc corresponding to the first start node; + return ret; + } + + //Create the walker, the rules dispatcher and the context. + private static void genPruner(ExprNodeDesc pred, ExprProcCtx pprCtx, + HashMap outputMap) throws SemanticException { // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. The dispatcher // generates the plan from the operator tree @@ -248,13 +284,7 @@ List startNodes = new ArrayList(); startNodes.add(pred); - - HashMap outputMap = new HashMap(); egw.startWalking(startNodes, outputMap); - hasNonPartCols = pprCtx.getHasNonPartCols(); - - // Get the exprNodeDesc corresponding to the first start node; - return (ExprNodeDesc) outputMap.get(pred); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (working copy) @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; +import java.util.HashMap; import java.util.Map; import java.util.Stack; @@ -86,32 +87,59 @@ String alias = top.getConf().getAlias(); // Generate the partition pruning predicate - boolean hasNonPartCols = false; - ExprNodeDesc ppr_pred = ExprProcFactory.genPruner(alias, predicate, - hasNonPartCols); - owc.addHasNonPartCols(hasNonPartCols); + ExprPrunerResult ppr_pred = ExprProcFactory.genPruner(alias, predicate); // Add the pruning predicate to the table scan operator - addPruningPred(owc.getOpToPartPruner(), top, ppr_pred); + addPruningPred(top, owc.getOpToPartPruner(), ppr_pred.ppr_pred, + owc.getOpToNullPartPruner(), ppr_pred.ppr_prob_pred, owc.getHasNonPartCols(), ppr_pred.hasNonPartCols); return null; } - private void addPruningPred(Map opToPPR, - TableScanOperator top, ExprNodeDesc new_ppr_pred) throws UDFArgumentException { + static class ExprPrunerResult { + ExprNodeDesc ppr_pred; + ExprNodeDesc ppr_prob_pred; // used to probe to see if the partition predicate has effect or not. + boolean hasNonPartCols = false; + } + + private void addPruningPred(TableScanOperator top, + Map opToPPR, + ExprNodeDesc new_ppr_pred, + Map opToNullPPR, + ExprNodeDesc pprProbPred, HashMap hasNonPartColsMap, boolean hasNonPartCols) throws UDFArgumentException { ExprNodeDesc old_ppr_pred = opToPPR.get(top); ExprNodeDesc ppr_pred = null; if (old_ppr_pred != null) { // or the old_ppr_pred and the new_ppr_pred ppr_pred = TypeCheckProcFactory.DefaultExprProcessor .getFuncExprNodeDesc("OR", old_ppr_pred, new_ppr_pred); + if (hasNonPartColsMap.get(old_ppr_pred) != null) { + hasNonPartCols = hasNonPartCols || hasNonPartColsMap.get(old_ppr_pred); + } } else { ppr_pred = new_ppr_pred; } + if(hasNonPartColsMap != null) { + hasNonPartColsMap.put(ppr_pred, hasNonPartCols); + } + // Put the mapping from table scan operator to ppr_pred opToPPR.put(top, ppr_pred); + ExprNodeDesc old_ppr_null_pred = opToNullPPR.get(top); + ExprNodeDesc ppr_null_pred = null; + if (old_ppr_null_pred != null) { + // or the old_ppr_pred and the new_ppr_pred + ppr_null_pred = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("OR", old_ppr_null_pred, pprProbPred); + } else { + ppr_null_pred = pprProbPred; + } + + // Put the mapping from table scan operator to ppr_pred + opToNullPPR.put(top, ppr_null_pred); + return; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -521,8 +521,8 @@ try { partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), - alias_id, parseCtx.getPrunedPartitions()); + parseCtx.getOpToPartPruner().get(topOp), parseCtx.getOpToNullPartPruner().get(topOp), + opProcCtx.getConf(), alias_id, parseCtx.getPrunedPartitions(), parseCtx); } catch (SemanticException e) { throw e; } catch (HiveException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (working copy) @@ -180,7 +180,7 @@ if (topOp == null || (!(topOp instanceof TableScanOperator))) { // this is in a sub-query. // In future, we need to infer subq's columns propery. For example - // "select key, count(1) + // "select key, count(1) // from (from clustergroupbyselect key, value where ds='210') group by key, 3;", // even though the group by op is in a subquery, it can be changed to // bucket groupby. @@ -204,8 +204,8 @@ PrunedPartitionList partsList = null; try { partsList = PartitionPruner.prune(destTable, pGraphContext - .getOpToPartPruner().get(ts), pGraphContext.getConf(), table, - pGraphContext.getPrunedPartitions()); + .getOpToPartPruner().get(ts), pGraphContext.getOpToNullPartPruner().get(ts), + pGraphContext.getConf(), table, pGraphContext.getPrunedPartitions(), pGraphContext); } catch (HiveException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils @@ -233,14 +233,14 @@ /** * Given the group by keys, bucket columns, sort column, this method * determines if we can use sorted group by or not. - * + * * We use bucket columns only when the sorted column set is empty and if all * group by columns are contained in bucket columns. - * + * * If we can can not determine by looking at bucketed columns and the table * has sort columns, we resort to sort columns. We can use bucket group by * if the groupby column set is an exact prefix match of sort columns. - * + * * @param groupByCols * @param bucketCols * @param sortCols Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy) @@ -67,7 +67,7 @@ *this transformation does bucket map join optimization. */ public class BucketMapJoinOptimizer implements Transform { - + private static final Log LOG = LogFactory.getLog(GroupByOptimizer.class .getName()); @@ -130,11 +130,11 @@ } }; } - + class BucketMapjoinOptProc implements NodeProcessor { - + protected ParseContext pGraphContext; - + public BucketMapjoinOptProc(ParseContext pGraphContext) { super(); this.pGraphContext = pGraphContext; @@ -146,13 +146,15 @@ MapJoinOperator mapJoinOp = (MapJoinOperator) nd; BucketMapjoinOptProcCtx context = (BucketMapjoinOptProcCtx) procCtx; - if(context.getListOfRejectedMapjoins().contains(mapJoinOp)) + if(context.getListOfRejectedMapjoins().contains(mapJoinOp)) { return null; - + } + QBJoinTree joinCxt = this.pGraphContext.getMapJoinContext().get(mapJoinOp); - if(joinCxt == null) + if(joinCxt == null) { return null; - + } + List joinAliases = new ArrayList(); String[] srcs = joinCxt.getBaseSrc(); String[] left = joinCxt.getLeftAliases(); @@ -174,7 +176,7 @@ } } } - + MapJoinDesc mjDecs = mapJoinOp.getConf(); LinkedHashMap aliasToBucketNumberMapping = new LinkedHashMap(); LinkedHashMap> aliasToBucketFileNamesMapping = new LinkedHashMap>(); @@ -183,23 +185,24 @@ // with only one partition presents in each join source tables. Map> topOps = this.pGraphContext.getTopOps(); Map topToTable = this.pGraphContext.getTopToTable(); - + // (partition to bucket file names) and (partition to bucket number) for // the big table; LinkedHashMap> bigTblPartsToBucketFileNames = new LinkedHashMap>(); LinkedHashMap bigTblPartsToBucketNumber = new LinkedHashMap(); - + for (int index = 0; index < joinAliases.size(); index++) { String alias = joinAliases.get(index); TableScanOperator tso = (TableScanOperator) topOps.get(alias); - if (tso == null) + if (tso == null) { return null; + } Table tbl = topToTable.get(tso); if(tbl.isPartitioned()) { PrunedPartitionList prunedParts = null; try { - prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); + prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getOpToNullPartPruner().get(tso), + pGraphContext.getConf(), alias, pGraphContext.getPrunedPartitions(), pGraphContext); } catch (HiveException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils @@ -208,9 +211,9 @@ } int partNumber = prunedParts.getConfirmedPartns().size() + prunedParts.getUnknownPartns().size(); - + if (partNumber > 1) { - // only allow one partition for small tables + // only allow one partition for small tables if(alias != baseBigAlias) { return null; } @@ -243,7 +246,7 @@ // not contain mappings for the big table. Instead, the mappings are // contained in bigTblPartsToBucketFileNames and // bigTblPartsToBucketNumber - + } else { Partition part = null; Iterator iter = prunedParts.getConfirmedPartns() @@ -267,8 +270,9 @@ } } } else { - if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) + if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) { return null; + } Integer num = new Integer(tbl.getNumBuckets()); aliasToBucketNumberMapping.put(alias, num); List fileNames = new ArrayList(); @@ -286,7 +290,7 @@ aliasToBucketFileNamesMapping.put(alias, fileNames); } } - + // All tables or partitions are bucketed, and their bucket number is // stored in 'bucketNumbers', we need to check if the number of buckets in // the big table can be divided by no of buckets in small tables. @@ -307,32 +311,33 @@ return null; } } - + MapJoinDesc desc = mapJoinOp.getConf(); - - LinkedHashMap>> aliasBucketFileNameMapping = + + LinkedHashMap>> aliasBucketFileNameMapping = new LinkedHashMap>>(); - - //sort bucket names for the big table + + //sort bucket names for the big table if(bigTblPartsToBucketNumber.size() > 0) { Collection> bucketNamesAllParts = bigTblPartsToBucketFileNames.values(); for(List partBucketNames : bucketNamesAllParts) { Collections.sort(partBucketNames); } } else { - Collections.sort(aliasToBucketFileNamesMapping.get(baseBigAlias)); + Collections.sort(aliasToBucketFileNamesMapping.get(baseBigAlias)); } - + // go through all small tables and get the mapping from bucket file name - // in the big table to bucket file names in small tables. + // in the big table to bucket file names in small tables. for (int j = 0; j < joinAliases.size(); j++) { String alias = joinAliases.get(j); - if(alias.equals(baseBigAlias)) + if(alias.equals(baseBigAlias)) { continue; + } Collections.sort(aliasToBucketFileNamesMapping.get(alias)); LinkedHashMap> mapping = new LinkedHashMap>(); aliasBucketFileNameMapping.put(alias, mapping); - + // for each bucket file in big table, get the corresponding bucket file // name in the small table. if (bigTblPartsToBucketNumber.size() > 0) { @@ -400,8 +405,9 @@ int nxt = iter.next().intValue(); boolean ok = (nxt >= bucketNumberInPart) ? nxt % bucketNumberInPart == 0 : bucketNumberInPart % nxt == 0; - if(!ok) + if(!ok) { return false; + } } return true; } @@ -423,12 +429,13 @@ } return fileNames; } - + private boolean checkBucketColumns(List bucketColumns, MapJoinDesc mjDesc, int index) { List keys = mjDesc.getKeys().get((byte)index); - if (keys == null || bucketColumns == null || bucketColumns.size() == 0) + if (keys == null || bucketColumns == null || bucketColumns.size() == 0) { return false; - + } + //get all join columns from join keys stored in MapJoinDesc List joinCols = new ArrayList(); List joinKeys = new ArrayList(); @@ -450,30 +457,31 @@ } // to see if the join columns from a table is exactly this same as its - // bucket columns + // bucket columns if (joinCols.size() == 0 || joinCols.size() != bucketColumns.size()) { return false; } - + for (String col : joinCols) { - if (!bucketColumns.contains(col)) + if (!bucketColumns.contains(col)) { return false; + } } - + return true; } - + } - + class BucketMapjoinOptProcCtx implements NodeProcessorCtx { // we only convert map joins that follows a root table scan in the same // mapper. That means there is no reducer between the root table scan and // mapjoin. Set listOfRejectedMapjoins = new HashSet(); - + public Set getListOfRejectedMapjoins() { return listOfRejectedMapjoins; } - + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (working copy) @@ -78,6 +78,7 @@ SAMPLE_RESTRICTION("Cannot Sample on More Than Two Columns"), SAMPLE_COLUMN_NOT_FOUND("Sample Column Not Found"), NO_PARTITION_PREDICATE("No Partition Predicate Found"), + PARTITION_PREDICATE_NO_EFFECT("Partition predicate has no effect. Please check the query or set to nonstrict mode"), INVALID_DOT(". operator is only supported on struct or list of struct types"), INVALID_TBL_DDL_SERDE("Either list of columns or a custom serializer should be specified"), TARGET_TABLE_COLUMN_MISMATCH( @@ -168,7 +169,7 @@ * is not found or ErrorMsg has no SQLState, returns * the SQLState bound to the GENERIC_ERROR * ErrorMsg. - * + * * @param mesg * An error message string * @return SQLState Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -54,7 +54,6 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; @@ -164,6 +163,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { private HashMap opToPartPruner; + private HashMap opToNullPartPruner; private HashMap> topOps; private HashMap> topSelOps; private LinkedHashMap, OpParseContext> opParseCtx; @@ -183,6 +183,7 @@ private CreateViewDesc createVwDesc; private ASTNode viewSelect; private final UnparseTranslator unparseTranslator; + private HashMap hasNonPartCols; private static class Phase1Ctx { String dest; @@ -194,6 +195,8 @@ super(conf); opToPartPruner = new HashMap(); + opToNullPartPruner = new HashMap(); + hasNonPartCols = new HashMap(); opToSamplePruner = new HashMap(); topOps = new HashMap>(); topSelOps = new HashMap>(); @@ -230,6 +233,8 @@ public void init(ParseContext pctx) { opToPartPruner = pctx.getOpToPartPruner(); + opToNullPartPruner = pctx.getOpToNullPartPruner(); + hasNonPartCols = pctx.getHasNonPartCols(); opToSamplePruner = pctx.getOpToSamplePruner(); topOps = pctx.getTopOps(); topSelOps = pctx.getTopSelOps(); @@ -248,7 +253,7 @@ } public ParseContext getParseContext() { - return new ParseContext(conf, qb, ast, opToPartPruner, topOps, topSelOps, + return new ParseContext(conf, qb, ast, opToPartPruner,opToNullPartPruner, hasNonPartCols, topOps, topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner); @@ -5542,8 +5547,8 @@ PrunedPartitionList partsList = null; try { partsList = PartitionPruner.prune(topToTable.get(ts), - opToPartPruner.get(ts), conf, (String) topOps.keySet() - .toArray()[0], prunedPartitions); + opToPartPruner.get(ts), opToNullPartPruner.get(ts), + conf, (String) topOps.keySet().toArray()[0], prunedPartitions, this.getParseContext()); } catch (HiveException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils @@ -5946,7 +5951,7 @@ return; } - ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, + ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, opToNullPartPruner, hasNonPartCols, topOps, topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 924512) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -48,13 +48,14 @@ * populated. Note that since the parse context contains the operator tree, it * can be easily retrieved by the next optimization step or finally for task * generation after the plan has been completely optimized. - * + * **/ public class ParseContext { private QB qb; private ASTNode ast; private HashMap opToPartPruner; + private HashMap opToNullPartPruner; private HashMap opToSamplePruner; private HashMap> topOps; private HashMap> topSelOps; @@ -82,7 +83,7 @@ // partitioning columns - the partitions are identified and streamed directly // to the client without requiring // a map-reduce job - private boolean hasNonPartCols; + private HashMap hasNonPartCols; public ParseContext() { } @@ -94,6 +95,8 @@ * current parse tree * @param opToPartPruner * map from table scan operator to partition pruner + * @param hasNonPartCols + * @param opToNullPartPruner2 * @param topOps * list of operators for the top query * @param topSelOps @@ -124,7 +127,7 @@ QB qb, ASTNode ast, HashMap opToPartPruner, - HashMap> topOps, + HashMap opToNullPartPruner, HashMap hasNonPartCols, HashMap> topOps, HashMap> topSelOps, LinkedHashMap, OpParseContext> opParseCtx, Map joinContext, @@ -139,6 +142,7 @@ this.qb = qb; this.ast = ast; this.opToPartPruner = opToPartPruner; + this.opToNullPartPruner = opToNullPartPruner; this.joinContext = joinContext; this.topToTable = topToTable; this.loadFileWork = loadFileWork; @@ -151,7 +155,7 @@ this.destTableId = destTableId; this.uCtx = uCtx; this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; - hasNonPartCols = false; + this.hasNonPartCols = hasNonPartCols; this.groupOpToInputTables = new HashMap>(); this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; @@ -381,23 +385,8 @@ this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; } - /** - * Sets the hasNonPartCols flag. - * - * @param val - */ - public void setHasNonPartCols(boolean val) { - hasNonPartCols = val; - } /** - * Gets the value of the hasNonPartCols flag. - */ - public boolean getHasNonPartCols() { - return hasNonPartCols; - } - - /** * @return the opToSamplePruner */ public HashMap getOpToSamplePruner() { @@ -450,4 +439,21 @@ public void setMapJoinContext(Map mapJoinContext) { this.mapJoinContext = mapJoinContext; } + + public HashMap getOpToNullPartPruner() { + return opToNullPartPruner; + } + + public void setOpToNullPartPruner( + HashMap opToNullPartPruner) { + this.opToNullPartPruner = opToNullPartPruner; + } + + public HashMap getHasNonPartCols() { + return hasNonPartCols; + } + + public void setHasNonPartCols(HashMap hasNonPartCols) { + this.hasNonPartCols = hasNonPartCols; + } }