Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (revision 785040) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (working copy) @@ -155,7 +155,7 @@ private filterDesc getTestFilterDesc(String column) { ArrayList children1 = new ArrayList(); - children1.add(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, column)); + children1.add(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, column, "", false)); exprNodeDesc lhs = new exprNodeFuncDesc( TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getUDFClass(Constants.DOUBLE_TYPE_NAME), @@ -322,7 +322,7 @@ new exprNodeFieldDesc(TypeInfoFactory.stringTypeInfo, new exprNodeColumnDesc(TypeInfoFactory.getListTypeInfo( TypeInfoFactory.stringTypeInfo), - Utilities.ReduceField.VALUE.toString()), + Utilities.ReduceField.VALUE.toString(), "", false), "0", false)), outputColumns), op4); @@ -371,7 +371,7 @@ } public static exprNodeColumnDesc getStringColumn(String columnName) { - return new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, columnName); + return new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, columnName, "", false); } @SuppressWarnings("unchecked") Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java (revision 785040) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java (working copy) @@ -97,7 +97,7 @@ public void testExprNodeColumnEvaluator() throws Throwable { try { // get a evaluator for a simple field expression - exprNodeDesc exprDesc = new exprNodeColumnDesc(colaType, "cola"); + exprNodeDesc exprDesc = new exprNodeColumnDesc(colaType, "cola", "", false); ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(exprDesc); // evaluate on row @@ -116,8 +116,8 @@ public void testExprNodeFuncEvaluator() throws Throwable { try { // get a evaluator for a string concatenation expression - exprNodeDesc col1desc = new exprNodeColumnDesc(col1Type, "col1"); - exprNodeDesc coladesc = new exprNodeColumnDesc(colaType, "cola"); + exprNodeDesc col1desc = new exprNodeColumnDesc(col1Type, "col1", "", false); + exprNodeDesc coladesc = new exprNodeColumnDesc(colaType, "cola", "", false); exprNodeDesc col11desc = new exprNodeIndexDesc(col1desc, new exprNodeConstantDesc(new Integer(1))); exprNodeDesc cola0desc = new exprNodeIndexDesc(coladesc, new exprNodeConstantDesc(new Integer(0))); exprNodeDesc func1 = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", col11desc, cola0desc); @@ -138,7 +138,7 @@ public void testExprNodeConversionEvaluator() throws Throwable { try { // get a evaluator for a string concatenation expression - exprNodeDesc col1desc = new exprNodeColumnDesc(col1Type, "col1"); + exprNodeDesc col1desc = new exprNodeColumnDesc(col1Type, "col1", "", false); exprNodeDesc col11desc = new exprNodeIndexDesc(col1desc, new exprNodeConstantDesc(new Integer(1))); exprNodeDesc func1 = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc(Constants.DOUBLE_TYPE_NAME, col11desc); ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(func1); @@ -241,8 +241,8 @@ basetimes * 10, ExprNodeEvaluatorFactory.get( TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", - new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1"), constant1), - new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola"), constant1))), + new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1", "", false), constant1), + new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola", "", false), constant1))), r, "1b"); measureSpeed("concat(concat(col1[1], cola[1]), col1[2])", @@ -250,9 +250,9 @@ ExprNodeEvaluatorFactory.get( TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", - new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1"), constant1), - new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola"), constant1)), - new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1"), constant2))), + new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1", "", false), constant1), + new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola", "", false), constant1)), + new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1", "", false), constant2))), r, "1b2"); measureSpeed("concat(concat(concat(col1[1], cola[1]), col1[2]), cola[2])", @@ -261,10 +261,10 @@ TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", - new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1"), constant1), - new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola"), constant1)), - new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1"), constant2)), - new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola"), constant2))), + new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1", "", false), constant1), + new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola", "", false), constant1)), + new exprNodeIndexDesc(new exprNodeColumnDesc(col1Type, "col1", "", false), constant2)), + new exprNodeIndexDesc(new exprNodeColumnDesc(colaType, "cola", "", false), constant2))), r, "1b2c"); Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java (revision 785040) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java (working copy) @@ -43,8 +43,8 @@ try { // initialize a complete map reduce configuration - exprNodeDesc expr1 = new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F1); - exprNodeDesc expr2 = new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F2); + exprNodeDesc expr1 = new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F1, "", false); + exprNodeDesc expr2 = new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F2, "", false); exprNodeDesc filterExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", expr1, expr2); filterDesc filterCtx = new filterDesc(filterExpr); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java (revision 0) @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.ppr; + +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; + +/** + * The processor context for partition pruner. This contains the table + * alias that is being currently processed. + */ +public class ExprProcCtx implements NodeProcessorCtx { + + /** + * The table alias that is being currently processed. + */ + String tabAlias; + + public ExprProcCtx(String tabAlias) { + this.tabAlias = tabAlias; + } + + public String getTabAlias() { + return tabAlias; + } + + public void setTabAlias(String tabAlias) { + this.tabAlias = tabAlias; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (revision 0) @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.ppr; + +import java.util.HashMap; + +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.plan.exprNodeDesc; + +/** + * Context class for operator tree walker for partition pruner. + */ +public class OpWalkerCtx implements NodeProcessorCtx { + + /** + * Map from tablescan operator to partition pruning predicate + * that is initialized from the ParseContext + */ + private HashMap opToPartPruner; + + /** + * Constructor + */ + public OpWalkerCtx(HashMap opToPartPruner) { + this.opToPartPruner = opToPartPruner; + } + + public HashMap getOpToPartPruner() { + return this.opToPartPruner; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 0) @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.ppr; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.Transform; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * The transformation step that does partition pruning. + * + */ +public class PartitionPruner implements Transform { + + /* (non-Javadoc) + * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext) + */ + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + + // create a the context for walking operators + OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner()); + + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", "TS%FIL%"), OpProcFactory.getFilterProc()); + + // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, opWalkerCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprPrunerInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprPrunerInfo.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprPrunerInfo.java (revision 0) @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.ppr; + +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; + +/** + * The processor context for partition pruner. This contains the table + * alias that is being currently processed. + */ +public class ExprPrunerInfo implements NodeProcessorCtx { + + /** + * The table alias that is being currently processed. + */ + String tabAlias; + + public String getTabAlias() { + return tabAlias; + } + + public void setTabAlias(String tabAlias) { + this.tabAlias = tabAlias; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (revision 0) @@ -0,0 +1,314 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.ppr; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeFuncDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeIndexDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeNullDesc; +import org.apache.hadoop.hive.ql.udf.UDFOPAnd; +import org.apache.hadoop.hive.ql.udf.UDFOPOr; +import org.apache.hadoop.hive.ql.udf.UDFOPNot; +import org.apache.hadoop.hive.ql.udf.UDFType; + +/** + * Expression processor factory for partition pruning. Each processor tries + * to convert the expression subtree into a partition pruning expression. + * This expression is then used to figure out whether a particular partition + * should be scanned or not. + */ +public class ExprProcFactory { + + /** + * Processor for column expressions. + */ + public static class ColumnExprProcessor implements NodeProcessor { + + /** + * Converts the reference from child row resolver to current row resolver + */ + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + exprNodeDesc newcd = null; + exprNodeColumnDesc cd = (exprNodeColumnDesc) nd; + ExprPrunerInfo epi = (ExprPrunerInfo) procCtx; + if (cd.getTabAlias().equalsIgnoreCase(epi.getTabAlias()) && cd.getIsParititonCol()) + newcd = cd.clone(); + else + newcd = new exprNodeConstantDesc(cd.getTypeInfo(), null); + + return newcd; + } + + } + + /** + * Process function descriptors. + */ + public static class FuncExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + exprNodeDesc newfd = null; + exprNodeFuncDesc fd = (exprNodeFuncDesc) nd; + + boolean unknown = false; + // Check if any of the children is unknown for non logical operators + if (!fd.getUDFMethod().getDeclaringClass().equals(UDFOPAnd.class) + && !fd.getUDFMethod().getDeclaringClass().equals(UDFOPOr.class) + && !fd.getUDFMethod().getDeclaringClass().equals(UDFOPNot.class)) + for(Object child: nodeOutputs) { + exprNodeDesc child_nd = (exprNodeDesc)child; + if (child_nd instanceof exprNodeConstantDesc && + ((exprNodeConstantDesc)child_nd).getValue() == null) { + unknown = true; + } + } + + if (fd.getUDFClass().getAnnotation(UDFType.class) != null && + (fd.getUDFClass().getAnnotation(UDFType.class).deterministic() == false || + unknown)) + newfd = new exprNodeConstantDesc(fd.getTypeInfo(), null); + else { + // Create the list of children + ArrayList children = new ArrayList(); + for(Object child: nodeOutputs) { + children.add((exprNodeDesc) child); + } + // Create a copy of the function descriptor + newfd = new exprNodeFuncDesc(fd.getTypeInfo(), fd.getUDFClass(), + fd.getUDFMethod(), children); + } + + return newfd; + } + + } + + /** + * If all children are candidates and refer only to one table alias then this expr is a candidate + * else it is not a candidate but its children could be final candidates + */ + public static class GenericFuncExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + exprNodeDesc newfd = null; + exprNodeGenericFuncDesc fd = (exprNodeGenericFuncDesc) nd; + + boolean unknown = false; + // Check if any of the children is unknown + for(Object child: nodeOutputs) { + exprNodeDesc child_nd = (exprNodeDesc)child; + if (child_nd instanceof exprNodeConstantDesc && + ((exprNodeConstantDesc)child_nd).getValue() == null) { + unknown = true; + } + } + + if (unknown) + newfd = new exprNodeConstantDesc(fd.getTypeInfo(), null); + else { + // Create the list of children + ArrayList children = new ArrayList(); + for(Object child: nodeOutputs) { + children.add((exprNodeDesc) child); + } + // Create a copy of the function descriptor + newfd = new exprNodeGenericFuncDesc(fd.getTypeInfo(), fd.getGenericUDFClass(), children); + } + + return newfd; + } + + } + + public static class IndexExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + exprNodeIndexDesc ind = (exprNodeIndexDesc)nd; + boolean unknown = false; + int idx = 0; + exprNodeDesc list_nd = null; + exprNodeDesc idx_nd = null; + for(Object child: nodeOutputs) { + exprNodeDesc child_nd = (exprNodeDesc) child; + if (child_nd instanceof exprNodeConstantDesc && + ((exprNodeConstantDesc)child_nd).getValue() == null) + unknown = true; + if (idx == 0) + list_nd = child_nd; + if (idx == 1) + idx_nd = child_nd; + idx++; + } + + exprNodeDesc newnd = null; + if (unknown) { + newnd = new exprNodeConstantDesc(ind.getTypeInfo(), null); + } + else { + newnd = new exprNodeIndexDesc(ind.getTypeInfo(), list_nd, idx_nd); + } + return newnd; + } + + } + + public static class FieldExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + exprNodeFieldDesc fnd = (exprNodeFieldDesc)nd; + boolean unknown = false; + int idx = 0; + exprNodeDesc left_nd = null; + for(Object child: nodeOutputs) { + exprNodeDesc child_nd = (exprNodeDesc) child; + if (child_nd instanceof exprNodeConstantDesc && + ((exprNodeConstantDesc)child_nd).getValue() == null) + unknown = true; + left_nd = child_nd; + } + + assert(idx == 0); + + exprNodeDesc newnd = null; + if (unknown) { + newnd = new exprNodeConstantDesc(fnd.getTypeInfo(), null); + } + else { + newnd = new exprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd.getFieldName(), fnd.getIsList()); + } + return newnd; + } + + } + + /** + * Processor for constants and null expressions. For such expressions + * the processor simply clones the exprNodeDesc and returns it. + */ + public static class DefaultExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + if (nd instanceof exprNodeConstantDesc) + return ((exprNodeConstantDesc)nd).clone(); + else if (nd instanceof exprNodeNullDesc) + return ((exprNodeNullDesc)nd).clone(); + + assert(false); + return null; + } + } + + public static NodeProcessor getDefaultExprProcessor() { + return new DefaultExprProcessor(); + } + + public static NodeProcessor getFuncProcessor() { + return new FuncExprProcessor(); + } + + public static NodeProcessor getGenericFuncProcessor() { + return new GenericFuncExprProcessor(); + } + + public static NodeProcessor getIndexProcessor() { + return new IndexExprProcessor(); + } + + public static NodeProcessor getFieldProcessor() { + return new FieldExprProcessor(); + } + + public static NodeProcessor getColumnProcessor() { + return new ColumnExprProcessor(); + } + + /** + * Extracts pushdown predicates from the given list of predicate expression + * @param opContext operator context used for resolving column references + * @param op operator of the predicates being processed + * @param preds + * @return The expression walker information + * @throws SemanticException + */ + public static exprNodeDesc genPruner(String tabAlias, exprNodeDesc pred) throws SemanticException { + // Create the walker, the rules dispatcher and the context. + ExprProcCtx pprCtx= new ExprProcCtx(tabAlias); + + // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher + // generates the plan from the operator tree + Map exprRules = new LinkedHashMap(); + exprRules.put(new RuleRegExp("R1", exprNodeColumnDesc.class.getName() + "%"), getColumnProcessor()); + exprRules.put(new RuleRegExp("R2", exprNodeFieldDesc.class.getName() + "%"), getFuncProcessor()); + exprRules.put(new RuleRegExp("R3", exprNodeFuncDesc.class.getName() + "%"), getFieldProcessor()); + exprRules.put(new RuleRegExp("R4", exprNodeIndexDesc.class.getName() + "%"), getIndexProcessor()); + exprRules.put(new RuleRegExp("R5", exprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, pprCtx); + GraphWalker egw = new DefaultGraphWalker(disp); + + List startNodes = new ArrayList(); + startNodes.add(pred); + + HashMap outputMap = new HashMap(); + egw.startWalking(startNodes, outputMap); + + // Get the exprNodeDesc corresponding to the first start node; + return (exprNodeDesc)outputMap.get(pred); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (revision 0) @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.ppr; + +import java.util.Stack; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.exprNodeDesc; + +/** + * Operator factory for partition pruning processing of operator graph + * We find all the filter operators that appear just beneath the table scan + * operators. We then pass the filter to the partition pruner to construct + * a pruner for that table alias and store a mapping from the table scan + * operator to that pruner. We call that pruner later during plan generation. + */ +public class OpProcFactory { + + /** + * Determines the partition pruner for the filter. This is called only when the filter + * follows a table scan operator. + */ + public static class FilterPPR implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + OpWalkerCtx owc = (OpWalkerCtx)procCtx; + FilterOperator fop = (FilterOperator) nd; + TableScanOperator top = (TableScanOperator) stack.peek(); + exprNodeDesc predicate = fop.getConf().getPredicate(); + String alias = top.getConf().getAlias(); + + // Generate the partition pruning predicate + exprNodeDesc ppr_pred = ExprProcFactory.genPruner(alias, predicate); + + // Create a mapping from the table scan operator to the partition pruning predicate + owc.getOpToPartPruner().put(top, ppr_pred); + + return null; + } + } + + + /** + * Default processor which just merges its children + */ + public static class DefaultPPR implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + // Nothing needs to be done. + return null; + } + } + + public static NodeProcessor getFilterProc() { + return new FilterPPR(); + } + + public static NodeProcessor getDefaultProc() { + return new DefaultPPR(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.ppd.PredicatePushDown; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor; /** @@ -45,13 +46,14 @@ * @param hiveConf */ public void initialize(HiveConf hiveConf) { - transformations = new ArrayList(); - transformations.add(new ColumnPruner()); - if (hiveConf.getBoolean("hive.optimize.ppd", false)) - transformations.add(new PredicatePushDown()); - transformations.add(new UnionProcessor()); - transformations.add(new MapJoinProcessor()); - } + transformations = new ArrayList(); + transformations.add(new ColumnPruner()); + if (hiveConf.getBoolean("hive.optimize.ppd", false)) + transformations.add(new PredicatePushDown()); + transformations.add(new UnionProcessor()); + transformations.add(new PartitionPruner()); + transformations.add(new MapJoinProcessor()); + } /** * invoke all the transformations one-by-one, and alter the query plan @@ -59,9 +61,9 @@ * @throws SemanticException */ public ParseContext optimize() throws SemanticException { - for (Transform t : transformations) - pctx = t.transform(pctx); - return pctx; + for (Transform t : transformations) + pctx = t.transform(pctx); + return pctx; } /** @@ -77,6 +79,5 @@ public void setPctx(ParseContext pctx) { this.pctx = pctx; } - - + } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -393,11 +393,11 @@ tableDesc tblDesc = null; // Generate the map work for this alias_id - PartitionPruner pruner = parseCtx.getAliasToPruner().get(alias_id); + org.apache.hadoop.hive.ql.parse.PartitionPruner pruner = parseCtx.getAliasToPruner().get(alias_id); Set parts = null; try { // pass both confirmed and unknown partitions through the map-reduce framework - PartitionPruner.PrunedPartitionList partsList = pruner.prune(); + org.apache.hadoop.hive.ql.parse.PartitionPruner.PrunedPartitionList partsList = pruner.prune(); parts = partsList.getConfirmedPartns(); parts.addAll(partsList.getUnknownPartns()); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -166,7 +166,9 @@ { String field = fNamesIter.next(); ColumnInfo valueInfo = inputRS.get(key, field); - values.add(new exprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName())); + values.add(new exprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), + valueInfo.getTabAlias(), + valueInfo.getIsPartitionCol())); ColumnInfo oldValueInfo = oldOutputRS.get(key, field); String col = field; if(oldValueInfo != null) @@ -174,7 +176,7 @@ if (outputRS.get(key, col) == null) { outputColumnNames.add(col); outputRS.put(key, col, new ColumnInfo(col, - valueInfo.getType())); + valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsPartitionCol())); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (working copy) @@ -37,25 +37,34 @@ private String internalName; /** - * isVirtual indicates whether the column is a virtual column or not. Virtual columns - * are the ones that are not stored in the tables. For now these are just the partitioning - * columns. + * Store the alias of the table where available. */ - private boolean isVirtual; + private String tabAlias; + /** + * Indicates whether the column is a partition column. + */ + private boolean isPartitionCol; + transient private TypeInfo type; public ColumnInfo() { } - public ColumnInfo(String internalName, TypeInfo type) { + public ColumnInfo(String internalName, TypeInfo type, + String tabAlias, boolean isPartitionCol) { this.internalName = internalName; this.type = type; + this.tabAlias = tabAlias; + this.isPartitionCol = isPartitionCol; } - public ColumnInfo(String internalName, Class type) { + public ColumnInfo(String internalName, Class type, + String tabAlias, boolean isPartitionCol) { this.internalName = internalName; this.type = TypeInfoFactory.getPrimitiveTypeInfoFromPrimitiveWritable(type); + this.tabAlias = tabAlias; + this.isPartitionCol = isPartitionCol; } public TypeInfo getType() { @@ -74,6 +83,13 @@ this.internalName = internalName; } + public String getTabAlias() { + return this.tabAlias; + } + + public boolean getIsPartitionCol() { + return this.isPartitionCol; + } /** * Returns the string representation of the ColumnInfo. */ Index: ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java (working copy) @@ -27,16 +27,36 @@ public class exprNodeColumnDesc extends exprNodeDesc implements Serializable { private static final long serialVersionUID = 1L; + + /** + * The column name. + */ private String column; + + /** + * The alias of the table. + */ + private String tabAlias; + /** + * Is the column a partitioned column. + */ + private boolean isPartitionCol; + public exprNodeColumnDesc() {} - public exprNodeColumnDesc(TypeInfo typeInfo, String column) { + public exprNodeColumnDesc(TypeInfo typeInfo, String column, + String tabAlias, boolean isPartitionCol) { super(typeInfo); this.column = column; + this.tabAlias = tabAlias; + this.isPartitionCol = isPartitionCol; } - public exprNodeColumnDesc(Class c, String column) { + public exprNodeColumnDesc(Class c, String column, String tabAlias, + boolean isPartitionCol) { super(TypeInfoFactory.getPrimitiveTypeInfoFromJavaPrimitive(c)); this.column = column; + this.tabAlias = tabAlias; + this.isPartitionCol = isPartitionCol; } public String getColumn() { return this.column; @@ -45,6 +65,20 @@ this.column = column; } + public String getTabAlias() { + return this.tabAlias; + } + public void setTabAlias(String tabAlias) { + this.tabAlias = tabAlias; + } + + public boolean getIsParititonCol() { + return this.isPartitionCol; + } + public void setIsPartitionCol(boolean isPartitionCol) { + this.isPartitionCol = isPartitionCol; + } + public String toString() { return "Column[" + column + "]"; } @@ -62,7 +96,8 @@ } @Override public exprNodeDesc clone() { - return new exprNodeColumnDesc(this.typeInfo, this.column); + return new exprNodeColumnDesc(this.typeInfo, this.column, + this.tabAlias, this.isPartitionCol); } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/tableScanDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/tableScanDesc.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/tableScanDesc.java (working copy) @@ -28,8 +28,22 @@ @explain(displayName="TableScan") public class tableScanDesc implements Serializable { private static final long serialVersionUID = 1L; + + private String alias; + @SuppressWarnings("nls") - public tableScanDesc() { - throw new RuntimeException("This class does not need to be instantiated"); + public tableScanDesc() { } + + public tableScanDesc(final String alias) { + this.alias = alias; } + + @explain(displayName="alias") + public String getAlias() { + return alias; + } + + public void setAlias(String alias) { + this.alias = alias; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java (working copy) @@ -285,7 +285,8 @@ if (t.isPartitionKey(colName)) { // Set value to null if it's not partition column if (tabAlias.equalsIgnoreCase(tableAlias)) { - desc = new ExprNodeTempDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, colName)); + desc = new ExprNodeTempDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + colName, tabAlias, true)); } else { desc = new ExprNodeTempDesc(new exprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null)); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (working copy) @@ -92,7 +92,8 @@ // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.get("", expr.toStringTree()); if (colInfo != null) { - desc = new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName()); + desc = new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol()); return desc; } return desc; @@ -325,7 +326,8 @@ } } else { // It's a column. - return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName()); + return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol()); } } @@ -622,7 +624,8 @@ ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1))); return null; } - return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName()); + return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol()); } // Return nulls for conversion operators Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -137,6 +137,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { private HashMap aliasToPruner; + private HashMap opToPartPruner; private HashMap aliasToSamplePruner; private HashMap> topOps; private HashMap> topSelOps; @@ -203,6 +204,7 @@ public void init(ParseContext pctx) { aliasToPruner = pctx.getAliasToPruner(); + opToPartPruner = pctx.getOpToPartPruner(); aliasToSamplePruner = pctx.getAliasToSamplePruner(); topOps = pctx.getTopOps(); topSelOps = pctx.getTopSelOps(); @@ -218,7 +220,7 @@ } public ParseContext getParseContext() { - return new ParseContext(conf, qb, ast, aliasToPruner, aliasToSamplePruner, topOps, + return new ParseContext(conf, qb, ast, aliasToPruner, opToPartPruner, aliasToSamplePruner, topOps, topSelOps, opParseCtx, joinContext, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx); } @@ -1010,7 +1012,7 @@ private Integer genColListRegex(String colRegex, String tabAlias, String alias, ASTNode sel, ArrayList col_list, RowResolver input, Integer pos, RowResolver output) throws SemanticException { - + // The table alias should exist if (tabAlias != null && !input.hasTableAlias(tabAlias)) throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel)); @@ -1041,9 +1043,13 @@ continue; } - exprNodeColumnDesc expr = new exprNodeColumnDesc(colInfo.getType(), name); + exprNodeColumnDesc expr = new exprNodeColumnDesc(colInfo.getType(), name, + colInfo.getTabAlias(), + colInfo.getIsPartitionCol()); col_list.add(expr); - output.put(tmp[0], tmp[1], new ColumnInfo(getColumnInternalName(pos), colInfo.getType())); + output.put(tmp[0], tmp[1], + new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol())); pos = Integer.valueOf(pos.intValue() + 1); matched ++; } @@ -1116,7 +1122,7 @@ out_rwsch.put( qb.getParseInfo().getAlias(), outputColList.get(i), - new ColumnInfo(outputColList.get(i), TypeInfoFactory.stringTypeInfo) // Script output is always a string + new ColumnInfo(outputColList.get(i), TypeInfoFactory.stringTypeInfo, null, false) // Script output is always a string ); } @@ -1313,7 +1319,7 @@ } out_rwsch.put(tabAlias, colAlias, new ColumnInfo(getColumnInternalName(pos), - exp.getTypeInfo())); + exp.getTypeInfo(), tabAlias, false)); pos = Integer.valueOf(pos.intValue() + 1); } } @@ -1445,11 +1451,12 @@ throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); } - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), exprInfo.getInternalName())); + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), + exprInfo.getInternalName(), "", false)); String field = getColumnInternalName(i); outputColumnNames.add(field); groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType())); + new ColumnInfo(field, exprInfo.getType(), null, false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } // For each aggregation @@ -1473,7 +1480,10 @@ String paraExpression = paraExprInfo.getInternalName(); assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExprInfo.getInternalName())); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), + paraExprInfo.getInternalName(), + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); } UDAFInfo udaf = getUDAFInfo(aggName, mode, aggParameters, value); @@ -1486,7 +1496,7 @@ outputColumnNames.add(field); groupByOutputRowResolver.put("",value.toStringTree(), new ColumnInfo(field, - udaf.retType)); + udaf.retType, "", false)); } Operator op = @@ -1530,11 +1540,14 @@ throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); } - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), exprInfo.getInternalName())); + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), + exprInfo.getInternalName(), + exprInfo.getTabAlias(), + exprInfo.getIsPartitionCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType())); + new ColumnInfo(field, exprInfo.getType(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -1559,7 +1572,10 @@ String paraExpression = paraExprInfo.getInternalName(); assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExprInfo.getInternalName())); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), + paraExprInfo.getInternalName(), + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); } } @@ -1571,7 +1587,9 @@ } String paraExpression = paraExprInfo.getInternalName(); assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression)); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); } UDAFInfo udaf = getUDAFInfo(aggName, mode, aggParameters, value); @@ -1583,7 +1601,7 @@ outputColumnNames.add(field); groupByOutputRowResolver.put("", value.toStringTree(), new ColumnInfo(field, - udaf.retType)); + udaf.retType, "", false)); } Operator op = putOpInsertMap( @@ -1625,7 +1643,7 @@ String field = getColumnInternalName(i); outputColumnNames.add(field); groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, grpByExprNode.getTypeInfo())); + new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -1643,7 +1661,7 @@ numDistn++; String field = getColumnInternalName(grpByExprs.size() + numDistn -1); outputColumnNames.add(field); - groupByOutputRowResolver.put("", text, new ColumnInfo(field, distExprNode.getTypeInfo())); + groupByOutputRowResolver.put("", text, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } } @@ -1679,7 +1697,7 @@ outputColumnNames.add(field); groupByOutputRowResolver.put("",value.toStringTree(), new ColumnInfo(field, - udaf.retType)); + udaf.retType, "", false)); } Operator op = putOpInsertMap( @@ -1727,7 +1745,7 @@ outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo()); + reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); reduceSinkOutputRowResolver.put("", text, colInfo); colExprMap.put(colInfo.getInternalName(), inputExpr); } else { @@ -1747,7 +1765,7 @@ outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo()); + reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); reduceSinkOutputRowResolver.put("", text, colInfo); colExprMap.put(colInfo.getInternalName(), reduceKeys.get(reduceKeys.size()-1)); } @@ -1771,7 +1789,8 @@ String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); reduceSinkOutputRowResolver.put("", text, new ColumnInfo(field, - reduceValues.get(reduceValues.size()-1).getTypeInfo())); + reduceValues.get(reduceValues.size()-1).getTypeInfo(), + null, false)); } } } @@ -1784,13 +1803,14 @@ for (Map.Entry entry : aggregationTrees.entrySet()) { TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType(); - reduceValues.add(new exprNodeColumnDesc(type, getColumnInternalName(inputField))); + reduceValues.add(new exprNodeColumnDesc(type, getColumnInternalName(inputField), + "", false)); inputField++; outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); reduceSinkOutputRowResolver.put("", ((ASTNode)entry.getValue()).toStringTree(), new ColumnInfo(field, - type)); + type, null, false)); } } @@ -1835,10 +1855,10 @@ String field = getColumnInternalName(i); outputColumnNames.add(field); TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", grpbyExpr.toStringTree()).getType(); - exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(typeInfo, field); + exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(typeInfo, field, "", false); reduceKeys.add(inputExpr); ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + field, - typeInfo); + typeInfo, "", false); reduceSinkOutputRowResolver2.put("", grpbyExpr.toStringTree(), colInfo); colExprMap.put(colInfo.getInternalName(), inputExpr); @@ -1852,13 +1872,13 @@ String field = getColumnInternalName(inputField); ASTNode t = entry.getValue(); TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", t.toStringTree()).getType(); - reduceValues.add(new exprNodeColumnDesc(typeInfo, field)); + reduceValues.add(new exprNodeColumnDesc(typeInfo, field, "", false)); inputField++; String col = getColumnInternalName(reduceValues.size()-1); outputColumnNames.add(col); reduceSinkOutputRowResolver2.put("", t.toStringTree(), new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, - typeInfo)); + typeInfo, "", false)); } ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( @@ -1905,11 +1925,13 @@ } String expression = exprInfo.getInternalName(); - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), expression)); + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), expression, + exprInfo.getTabAlias(), + exprInfo.getIsPartitionCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); groupByOutputRowResolver2.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType())); + new ColumnInfo(field, exprInfo.getType(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } HashMap aggregationTrees = parseInfo @@ -1924,7 +1946,9 @@ } String paraExpression = paraExprInfo.getInternalName(); assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression)); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); String aggName = value.getChild(0).getText(); Class aggClass = FunctionRegistry.getUDAF(aggName); @@ -1939,7 +1963,7 @@ outputColumnNames.add(field); groupByOutputRowResolver2.put("", value.toStringTree(), new ColumnInfo(field, - udaf.retType)); + udaf.retType, "", false)); } Operator op = putOpInsertMap( @@ -2421,7 +2445,8 @@ ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector(); TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI); TypeInfo rowFieldTypeInfo = rowFields.get(i).getType(); - exprNodeDesc column = new exprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(i).getInternalName()); + exprNodeDesc column = new exprNodeColumnDesc(rowFieldTypeInfo, + rowFields.get(i).getInternalName(), "", false); // LazySimpleSerDe can convert any types to String type using JSON-format. if (!tableFieldTypeInfo.equals(rowFieldTypeInfo) && !(isLazySimpleSerDe && tableFieldTypeInfo.getCategory().equals(Category.PRIMITIVE) @@ -2451,7 +2476,7 @@ ArrayList colName = new ArrayList(); for (int i=0; i colExprMap = new HashMap(); ArrayList valueCols = new ArrayList(); for(ColumnInfo colInfo: inputRR.getColumnInfos()) { - valueCols.add(new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName())); + valueCols.add(new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol())); colExprMap.put(colInfo.getInternalName(), valueCols.get(valueCols.size() - 1)); } @@ -2590,13 +2616,15 @@ for(ColumnInfo colInfo: interim_rwsch.getColumnInfos()) { String [] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); out_rwsch.put(info[0], info[1], - new ColumnInfo(getColumnInternalName(pos), colInfo.getType())); + new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), info[0], false)); pos = Integer.valueOf(pos.intValue() + 1); } Operator output = putOpInsertMap( OperatorFactory.getAndMakeChild( - new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, Utilities.ReduceField.VALUE.toString())), + new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + Utilities.ReduceField.VALUE.toString(), + "", false)), new RowSchema(out_rwsch.getColumnInfos()), interim), out_rwsch); @@ -2641,14 +2669,17 @@ { String field = fNamesIter.next(); ColumnInfo valueInfo = inputRS.get(key, field); - keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName())); + keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(), + valueInfo.getInternalName(), + valueInfo.getTabAlias(), + valueInfo.getIsPartitionCol())); if (outputRS.get(key, field) == null) { String colName = getColumnInternalName(outputPos); outputPos++; outputColumnNames.add(colName); colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1)); outputRS.put(key, field, new ColumnInfo(colName, - valueInfo.getType())); + valueInfo.getType(), key, false)); } } } @@ -2696,14 +2727,17 @@ for (Map.Entry entry : fMap.entrySet()) { String field = entry.getKey(); ColumnInfo valueInfo = entry.getValue(); - exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName()); + exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(valueInfo.getType(), + valueInfo.getInternalName(), + valueInfo.getTabAlias(), + valueInfo.getIsPartitionCol()); reduceValues.add(inputExpr); if (outputRS.get(src, field) == null) { String col = getColumnInternalName(reduceValues.size() - 1); outputColumns.add(col); ColumnInfo newColInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, - valueInfo.getType()); + valueInfo.getType(), src, false); colExprMap.put(newColInfo.getInternalName(), inputExpr); outputRS.put(src, field, newColInfo); } @@ -3136,7 +3170,8 @@ ArrayList columnNames = new ArrayList(); for (int i = 0; i < columns.size(); i++) { ColumnInfo col = columns.get(i); - colList.add(new exprNodeColumnDesc(col.getType(), col.getInternalName())); + colList.add(new exprNodeColumnDesc(col.getType(), col.getInternalName(), + col.getTabAlias(), col.getIsPartitionCol())); columnNames.add(col.getInternalName()); } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( @@ -3379,7 +3414,8 @@ for (String col : bucketCols) { ColumnInfo ci = rwsch.get(alias, col); // TODO: change type to the one in the table schema - args.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName())); + args.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName(), + ci.getTabAlias(), ci.getIsPartitionCol())); } } else { @@ -3429,7 +3465,8 @@ for (int i=0; i> iterP = aliasToPruner.entrySet().iterator(); PartitionPruner pr = ((Map.Entry)iterP.next()).getValue(); @@ -3687,6 +3726,7 @@ } } } + } if (fetch != null) { fetchTask = TaskFactory.get(fetch, this.conf); @@ -3840,7 +3880,7 @@ genPlan(qb); - ParseContext pCtx = new ParseContext(conf, qb, ast, aliasToPruner, aliasToSamplePruner, topOps, + ParseContext pCtx = new ParseContext(conf, qb, ast, aliasToPruner, opToPartPruner, aliasToSamplePruner, topOps, topSelOps, opParseCtx, joinContext, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx); Optimizer optm = new Optimizer(); @@ -3887,7 +3927,8 @@ // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.get("", expr.toStringTree()); if (colInfo != null) { - return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName()); + return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol()); } // Create the walker, the rules dispatcher and the context. Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 785040) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.plan.exprNodeDesc; import org.apache.hadoop.hive.ql.plan.loadFileDesc; import org.apache.hadoop.hive.ql.plan.loadTableDesc; import org.apache.hadoop.hive.ql.Context; @@ -45,6 +47,7 @@ private QB qb; private ASTNode ast; private HashMap aliasToPruner; + private HashMap opToPartPruner; private HashMap aliasToSamplePruner; private HashMap> topOps; private HashMap> topSelOps; @@ -65,6 +68,8 @@ * current parse tree * @param aliasToPruner * partition pruner list + * @param opToPartPruner + * map from table scan operator to partition pruner * @param aliasToSamplePruner * sample pruner list * @param loadFileWork @@ -81,6 +86,7 @@ */ public ParseContext(HiveConf conf, QB qb, ASTNode ast, HashMap aliasToPruner, + HashMap opToPartPruner, HashMap aliasToSamplePruner, HashMap> topOps, HashMap> topSelOps, @@ -92,6 +98,7 @@ this.qb = qb; this.ast = ast; this.aliasToPruner = aliasToPruner; + this.opToPartPruner = opToPartPruner; this.aliasToSamplePruner = aliasToSamplePruner; this.joinContext = joinContext; this.loadFileWork = loadFileWork; @@ -181,6 +188,21 @@ } /** + * @return the opToPartPruner + */ + public HashMap getOpToPartPruner() { + return opToPartPruner; + } + + /** + * @param opToPartPruner + * the opToPartPruner to set + */ + public void setOpToPartPruner(HashMap opToPartPruner) { + this.opToPartPruner = opToPartPruner; + } + + /** * @return the aliasToSamplePruner */ public HashMap getAliasToSamplePruner() {