Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java (revision 0) @@ -0,0 +1,235 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.Stack; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.ppr.ExprProcCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; + +/** + * Expression processor factory for pruning. Each processor tries to + * convert the expression subtree into a pruning expression. + * + * It can be used for partition prunner and list bucketing pruner. + */ +public abstract class PrunerExpressionOperatorFactory { + + /** + * If all children are candidates and refer only to one table alias then this + * expr is a candidate else it is not a candidate but its children could be + * final candidates. + */ + public static class GenericFuncExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + ExprNodeDesc newfd = null; + ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd; + + boolean unknown = false; + + if (FunctionRegistry.isOpAndOrNot(fd)) { + // do nothing because "And" and "Or" and "Not" supports null value + // evaluation + // NOTE: In the future all UDFs that treats null value as UNKNOWN (both + // in parameters and return + // values) should derive from a common base class UDFNullAsUnknown, so + // instead of listing the classes + // here we would test whether a class is derived from that base class. + // If All childs are null, set unknown to true + boolean isAllNull = true; + for (Object child : nodeOutputs) { + ExprNodeDesc child_nd = (ExprNodeDesc) child; + if (!(child_nd instanceof ExprNodeConstantDesc + && ((ExprNodeConstantDesc) child_nd).getValue() == null)) { + isAllNull = false; + } + } + unknown = isAllNull; + } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { + // If it's a non-deterministic UDF, set unknown to true + unknown = true; + } else { + // If any child is null, set unknown to true + for (Object child : nodeOutputs) { + ExprNodeDesc child_nd = (ExprNodeDesc) child; + if (child_nd instanceof ExprNodeConstantDesc + && ((ExprNodeConstantDesc) child_nd).getValue() == null) { + unknown = true; + } + } + } + + if (unknown) { + newfd = new ExprNodeConstantDesc(fd.getTypeInfo(), null); + } else { + // Create the list of children + ArrayList children = new ArrayList(); + for (Object child : nodeOutputs) { + children.add((ExprNodeDesc) child); + } + // Create a copy of the function descriptor + newfd = new ExprNodeGenericFuncDesc(fd.getTypeInfo(), fd + .getGenericUDF(), children); + } + + return newfd; + } + + } + + /** + * FieldExprProcessor. + * + */ + public static class FieldExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + ExprNodeFieldDesc fnd = (ExprNodeFieldDesc) nd; + boolean unknown = false; + int idx = 0; + ExprNodeDesc left_nd = null; + for (Object child : nodeOutputs) { + ExprNodeDesc child_nd = (ExprNodeDesc) child; + if (child_nd instanceof ExprNodeConstantDesc + && ((ExprNodeConstantDesc) child_nd).getValue() == null) { + unknown = true; + } + left_nd = child_nd; + } + + assert (idx == 0); + + ExprNodeDesc newnd = null; + if (unknown) { + newnd = new ExprNodeConstantDesc(fnd.getTypeInfo(), null); + } else { + newnd = new ExprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd + .getFieldName(), fnd.getIsList()); + } + return newnd; + } + + } + + + /** + * Processor for column expressions. + */ + public static class ColumnExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + ExprNodeDesc newcd = null; + ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd; + /*Leave it for future refactor.*/ + if (procCtx instanceof ExprProcCtx) { + ExprProcCtx epc = (ExprProcCtx) procCtx; + if (cd.getTabAlias().equalsIgnoreCase(epc.getTabAlias()) + && cd.getIsPartitionColOrVirtualCol()) { + newcd = cd.clone(); + } else { + newcd = new ExprNodeConstantDesc(cd.getTypeInfo(), null); + epc.setHasNonPartCols(true); + } + } else { + //shouldn't come to here. + throw new NotImplementedException(""); + } + + return newcd; + } + + } + + /** + * Processor for constants and null expressions. For such expressions the + * processor simply clones the exprNodeDesc and returns it. + */ + public static class DefaultExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + if (nd instanceof ExprNodeConstantDesc) { + return ((ExprNodeConstantDesc) nd).clone(); + } else if (nd instanceof ExprNodeNullDesc) { + return ((ExprNodeNullDesc) nd).clone(); + } + + assert (false); + return null; + } + } + + /** + * Instantiate default expression processor. + * @return + */ + public static NodeProcessor getDefaultExprProcessor() { + return new DefaultExprProcessor(); + } + + /** + * Instantiate generic function processor. + * + * @return + */ + public static NodeProcessor getGenericFuncProcessor() { + return new GenericFuncExprProcessor(); + } + + /** + * Instantiate field processor. + * + * @return + */ + public static NodeProcessor getFieldProcessor() { + return new FieldExprProcessor(); + } + + /** + * Instantiate column processor. + * + * @return + */ + public static NodeProcessor getColumnProcessor() { + return new ColumnExprProcessor(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java (revision 0) @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; + +/** + * Operator factory for pruning processing of operator graph We find + * all the filter operators that appear just beneath the table scan operators. + * We then pass the filter to the pruner to construct a pruner for + * that table alias and store a mapping from the table scan operator to that + * pruner. We call that pruner later during plan generation. + * + * Create this class from org.apache.hadoop.hive.ql.optimizer.ppr.OpProcFactory + * so that in addition to ppr, other pruner can use it. + */ +public abstract class PrunerOperatorFactory { + + /** + * Determines the partition pruner for the filter. This is called only when + * the filter follows a table scan operator. + */ + public static class Filter implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator fop = (FilterOperator) nd; + FilterOperator fop2 = null; + + // The stack contains either ... TS, Filter or + // ... TS, Filter, Filter with the head of the stack being the rightmost + // symbol. So we just pop out the two elements from the top and if the + // second one of them is not a table scan then the operator on the top of + // the stack is the Table scan operator. + Node tmp = stack.pop(); + Node tmp2 = stack.pop(); + TableScanOperator top = null; + if (tmp2 instanceof TableScanOperator) { + top = (TableScanOperator) tmp2; + } else { + top = (TableScanOperator) stack.peek(); + fop2 = (FilterOperator) tmp2; + } + stack.push(tmp2); + stack.push(tmp); + + // If fop2 exists (i.e this is not the top level filter and fop2 is not + // a sampling filter then we ignore the current filter + if (fop2 != null && !fop2.getConf().getIsSamplingPred()) { + return null; + } + + // ignore the predicate in case it is not a sampling predicate + if (fop.getConf().getIsSamplingPred()) { + return null; + } + + generatePredicate(procCtx, fop, top); + + return null; + } + + /** + * Generate predicate. + * + * @param procCtx + * @param fop + * @param top + * @throws SemanticException + * @throws UDFArgumentException + */ + protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, + TableScanOperator top) throws SemanticException, UDFArgumentException { + // Subclass should implement it. + throw new NotImplementedException(""); + } + + /** + * Add pruning predicate. + * + * @param opToPrunner + * @param top + * @param new_pruner_pred + * @throws UDFArgumentException + */ + protected void addPruningPred(Map opToPrunner, + TableScanOperator top, ExprNodeDesc new_pruner_pred) throws UDFArgumentException { + ExprNodeDesc old_pruner_pred = opToPrunner.get(top); + ExprNodeDesc pruner_pred = null; + if (old_pruner_pred != null) { + // or the old_pruner_pred and the new_ppr_pred + pruner_pred = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("OR", old_pruner_pred, new_pruner_pred); + } else { + pruner_pred = new_pruner_pred; + } + + // Put the mapping from table scan operator to pruner_pred + opToPrunner.put(top, pruner_pred); + + return; + } + } + + /** + * Default processor which just merges its children. + */ + public static class Default implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + // Nothing needs to be done. + return null; + } + } + + /** + * Instantiate filter processor. + * + * @return + */ + public static NodeProcessor getFilterProc() { + //Subclass should implement it. + throw new NotImplementedException(""); + } + + /** + * Instantiate default processor. + * + * @return + */ + public static NodeProcessor getDefaultProc() { + return new Default(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1391606) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -37,22 +37,12 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.FilterOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.PrunerUtils; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; @@ -91,25 +81,8 @@ // create a the context for walking operators OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner()); - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", - "(" + TableScanOperator.getOperatorName() + "%" - + FilterOperator.getOperatorName() + "%)|(" - + TableScanOperator.getOperatorName() + "%" - + FilterOperator.getOperatorName() + "%" - + FilterOperator.getOperatorName() + "%)"), - OpProcFactory.getFilterProc()); - - // The dispatcher fires the processor corresponding to the closest matching - // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), - opRules, opWalkerCtx); - GraphWalker ogw = new DefaultGraphWalker(disp); - - // Create a list of topop nodes - ArrayList topNodes = new ArrayList(); - topNodes.addAll(pctx.getTopOps().values()); - ogw.startWalking(topNodes, null); + PrunerUtils.walkOperatorTree(pctx, opWalkerCtx, OpProcFactory.getFilterProc(), + OpProcFactory.getDefaultProc()); pctx.setHasNonPartCols(opWalkerCtx.getHasNonPartCols()); return pctx; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (revision 1391606) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (working copy) @@ -18,30 +18,13 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; -import java.util.ArrayList; import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.PrunerExpressionOperatorFactory; +import org.apache.hadoop.hive.ql.optimizer.PrunerUtils; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; /** * Expression processor factory for partition pruning. Each processor tries to @@ -49,179 +32,15 @@ * expression is then used to figure out whether a particular partition should * be scanned or not. */ -public final class ExprProcFactory { +public final class ExprProcFactory extends PrunerExpressionOperatorFactory { private ExprProcFactory() { // prevent instantiation } /** - * Processor for column expressions. - */ - public static class ColumnExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - ExprNodeDesc newcd = null; - ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd; - ExprProcCtx epc = (ExprProcCtx) procCtx; - if (cd.getTabAlias().equalsIgnoreCase(epc.getTabAlias()) - && cd.getIsPartitionColOrVirtualCol()) { - newcd = cd.clone(); - } else { - newcd = new ExprNodeConstantDesc(cd.getTypeInfo(), null); - epc.setHasNonPartCols(true); - } - - return newcd; - } - - } - - /** - * If all children are candidates and refer only to one table alias then this - * expr is a candidate else it is not a candidate but its children could be - * final candidates. - */ - public static class GenericFuncExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - ExprNodeDesc newfd = null; - ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd; - - boolean unknown = false; - - if (FunctionRegistry.isOpAndOrNot(fd)) { - // do nothing because "And" and "Or" and "Not" supports null value - // evaluation - // NOTE: In the future all UDFs that treats null value as UNKNOWN (both - // in parameters and return - // values) should derive from a common base class UDFNullAsUnknown, so - // instead of listing the classes - // here we would test whether a class is derived from that base class. - // If All childs are null, set unknown to true - boolean isAllNull = true; - for (Object child : nodeOutputs) { - ExprNodeDesc child_nd = (ExprNodeDesc) child; - if (!(child_nd instanceof ExprNodeConstantDesc - && ((ExprNodeConstantDesc) child_nd).getValue() == null)) { - isAllNull = false; - } - } - unknown = isAllNull; - } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { - // If it's a non-deterministic UDF, set unknown to true - unknown = true; - } else { - // If any child is null, set unknown to true - for (Object child : nodeOutputs) { - ExprNodeDesc child_nd = (ExprNodeDesc) child; - if (child_nd instanceof ExprNodeConstantDesc - && ((ExprNodeConstantDesc) child_nd).getValue() == null) { - unknown = true; - } - } - } - - if (unknown) { - newfd = new ExprNodeConstantDesc(fd.getTypeInfo(), null); - } else { - // Create the list of children - ArrayList children = new ArrayList(); - for (Object child : nodeOutputs) { - children.add((ExprNodeDesc) child); - } - // Create a copy of the function descriptor - newfd = new ExprNodeGenericFuncDesc(fd.getTypeInfo(), fd - .getGenericUDF(), children); - } - - return newfd; - } - - } - - /** - * FieldExprProcessor. - * - */ - public static class FieldExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - ExprNodeFieldDesc fnd = (ExprNodeFieldDesc) nd; - boolean unknown = false; - int idx = 0; - ExprNodeDesc left_nd = null; - for (Object child : nodeOutputs) { - ExprNodeDesc child_nd = (ExprNodeDesc) child; - if (child_nd instanceof ExprNodeConstantDesc - && ((ExprNodeConstantDesc) child_nd).getValue() == null) { - unknown = true; - } - left_nd = child_nd; - } - - assert (idx == 0); - - ExprNodeDesc newnd = null; - if (unknown) { - newnd = new ExprNodeConstantDesc(fnd.getTypeInfo(), null); - } else { - newnd = new ExprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd - .getFieldName(), fnd.getIsList()); - } - return newnd; - } - - } - - /** - * Processor for constants and null expressions. For such expressions the - * processor simply clones the exprNodeDesc and returns it. - */ - public static class DefaultExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - if (nd instanceof ExprNodeConstantDesc) { - return ((ExprNodeConstantDesc) nd).clone(); - } else if (nd instanceof ExprNodeNullDesc) { - return ((ExprNodeNullDesc) nd).clone(); - } - - assert (false); - return null; - } - } - - public static NodeProcessor getDefaultExprProcessor() { - return new DefaultExprProcessor(); - } - - public static NodeProcessor getGenericFuncProcessor() { - return new GenericFuncExprProcessor(); - } - - public static NodeProcessor getFieldProcessor() { - return new FieldExprProcessor(); - } - - public static NodeProcessor getColumnProcessor() { - return new ColumnExprProcessor(); - } - - /** * Generates the partition pruner for the expression tree. - * + * * @param tabAlias * The table alias of the partition table that is being considered * for pruning @@ -237,30 +56,9 @@ // Create the walker, the rules dispatcher and the context. ExprProcCtx pprCtx = new ExprProcCtx(tabAlias); - // create a walker which walks the tree in a DFS manner while maintaining - // the operator stack. The dispatcher - // generates the plan from the operator tree - Map exprRules = new LinkedHashMap(); - exprRules.put( - new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), - getColumnProcessor()); - exprRules.put( - new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), - getFieldProcessor()); - exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName() - + "%"), getGenericFuncProcessor()); + HashMap outputMap = PrunerUtils.walkExprTree(pred, pprCtx, getColumnProcessor(), + getFieldProcessor(), getGenericFuncProcessor(), getDefaultExprProcessor()); - // The dispatcher fires the processor corresponding to the closest matching - // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), - exprRules, pprCtx); - GraphWalker egw = new DefaultGraphWalker(disp); - - List startNodes = new ArrayList(); - startNodes.add(pred); - - HashMap outputMap = new HashMap(); - egw.startWalking(startNodes, outputMap); hasNonPartCols = pprCtx.getHasNonPartCols(); // Get the exprNodeDesc corresponding to the first start node; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (revision 1391606) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (working copy) @@ -18,17 +18,13 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; -import java.util.Map; -import java.util.Stack; - import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** @@ -38,49 +34,18 @@ * that table alias and store a mapping from the table scan operator to that * pruner. We call that pruner later during plan generation. */ -public final class OpProcFactory { +public final class OpProcFactory extends PrunerOperatorFactory { /** * Determines the partition pruner for the filter. This is called only when * the filter follows a table scan operator. */ - public static class FilterPPR implements NodeProcessor { + public static class FilterPPR extends Filter { @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { + protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, + TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; - FilterOperator fop = (FilterOperator) nd; - FilterOperator fop2 = null; - - // The stack contains either ... TS, Filter or - // ... TS, Filter, Filter with the head of the stack being the rightmost - // symbol. So we just pop out the two elements from the top and if the - // second one of them is not a table scan then the operator on the top of - // the stack is the Table scan operator. - Node tmp = stack.pop(); - Node tmp2 = stack.pop(); - TableScanOperator top = null; - if (tmp2 instanceof TableScanOperator) { - top = (TableScanOperator) tmp2; - } else { - top = (TableScanOperator) stack.peek(); - fop2 = (FilterOperator) tmp2; - } - stack.push(tmp2); - stack.push(tmp); - - // If fop2 exists (i.e this is not the top level filter and fop2 is not - // a sampling filter then we ignore the current filter - if (fop2 != null && !fop2.getConf().getIsSamplingPred()) { - return null; - } - - // ignore the predicate in case it is not a sampling predicate - if (fop.getConf().getIsSamplingPred()) { - return null; - } - // Otherwise this is not a sampling predicate and we need to ExprNodeDesc predicate = fop.getConf().getPredicate(); String alias = top.getConf().getAlias(); @@ -93,50 +58,14 @@ // Add the pruning predicate to the table scan operator addPruningPred(owc.getOpToPartPruner(), top, ppr_pred); - - return null; } - private void addPruningPred(Map opToPPR, - TableScanOperator top, ExprNodeDesc new_ppr_pred) throws UDFArgumentException { - ExprNodeDesc old_ppr_pred = opToPPR.get(top); - ExprNodeDesc ppr_pred = null; - if (old_ppr_pred != null) { - // or the old_ppr_pred and the new_ppr_pred - ppr_pred = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc("OR", old_ppr_pred, new_ppr_pred); - } else { - ppr_pred = new_ppr_pred; - } - - // Put the mapping from table scan operator to ppr_pred - opToPPR.put(top, ppr_pred); - - return; - } } - /** - * Default processor which just merges its children. - */ - public static class DefaultPPR implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - // Nothing needs to be done. - return null; - } - } - public static NodeProcessor getFilterProc() { return new FilterPPR(); } - public static NodeProcessor getDefaultProc() { - return new DefaultPPR(); - } - private OpProcFactory() { // prevent instantiation } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java (revision 0) @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; + +/** + * General utility common functions for the Pruner to do optimization. + * + */ +public final class PrunerUtils { + private static Log LOG; + + static { + LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.optimizer.PrunerUtils"); + } + + private PrunerUtils() { + //prevent instantiation + } + + /** + * Walk operator tree for pruner generation. + * + * @param pctx + * @param opWalkerCtx + * @param filterProc + * @param defaultProc + * @throws SemanticException + */ + public static void walkOperatorTree(ParseContext pctx, NodeProcessorCtx opWalkerCtx, + NodeProcessor filterProc, NodeProcessor defaultProc) + throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", "(TS%FIL%)|(TS%FIL%FIL%)"), filterProc); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(defaultProc, opRules, opWalkerCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + } + + /** + * Walk expression tree for pruner generation. + * + * @param pred + * @param ctx + * @param colProc + * @param fieldProc + * @param genFuncProc + * @param defProc + * @return + * @throws SemanticException + */ + public static HashMap walkExprTree(ExprNodeDesc pred, NodeProcessorCtx ctx, + NodeProcessor colProc, NodeProcessor fieldProc, NodeProcessor genFuncProc, + NodeProcessor defProc) + throws SemanticException { + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher + // generates the plan from the operator tree + Map exprRules = new LinkedHashMap(); + exprRules.put( + new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), + colProc); + exprRules.put( + new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), + fieldProc); + exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName() + + "%"), genFuncProc); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(defProc, exprRules, ctx); + GraphWalker egw = new DefaultGraphWalker(disp); + + List startNodes = new ArrayList(); + startNodes.add(pred); + + HashMap outputMap = new HashMap(); + egw.startWalking(startNodes, outputMap); + return outputMap; + } + +}