list = tableScan.getNeededColumnIDs();
if (list != null) {
ColumnProjectionUtils.appendReadColumnIDs(jobConf, list);
} else {
ColumnProjectionUtils.setFullyReadColumns(jobConf);
}
+
+ TableScanDesc scanDesc = tableScan.getConf();
+ if (scanDesc != null) {
+ // construct column name list for reference by filter push down
+ Utilities.setColumnNameList(jobConf, tableScan);
+
+ // push down filters
+ ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
+ if (filterExpr != null) {
+ String filterText =
+ filterExpr.getExprString();
+ String filterExprSerialized =
+ Utilities.serializeExpression(filterExpr);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Filter text = " + filterText);
+ LOG.debug("Filter expression = " + filterExprSerialized);
+ }
+ jobConf.set(
+ TableScanDesc.FILTER_TEXT_CONF_STR,
+ filterText);
+ jobConf.set(
+ TableScanDesc.FILTER_EXPR_CONF_STR,
+ filterExprSerialized);
+ }
+ }
}
}
}
Index: ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (revision 995921)
+++ ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (working copy)
@@ -68,7 +68,7 @@
// clone a jobConf for setting needed columns for reading
JobConf cloneJobConf = new JobConf(job);
- initColumnsNeeded(cloneJobConf, inputFormatClass, hsplit.getPath()
+ pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
.toString(), hsplit.getPath().toUri().getPath());
InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
Index: ql/src/java/org/apache/hadoop/hive/ql/index/IndexSearchCondition.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/index/IndexSearchCondition.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/index/IndexSearchCondition.java (revision 0)
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.index;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+
+/**
+ * IndexSearchCondition represents an individual search condition
+ * found by {@link IndexPredicateAnalyzer}.
+ *
+ * @author John Sichi
+ * @version $Id:$
+ */
+public class IndexSearchCondition
+{
+ private ExprNodeColumnDesc columnDesc;
+ private String comparisonOp;
+ private ExprNodeConstantDesc constantDesc;
+ private ExprNodeDesc comparisonExpr;
+
+ /**
+ * Constructs a search condition, which takes the form
+ * column-ref comparison-op constant-value
.
+ *
+ * @param columnDesc column being compared
+ *
+ * @param comparisonOp comparison operator, e.g. "="
+ * (taken from GenericUDFBridge.getUdfName())
+ *
+ * @param constantDesc constant value to search for
+ *
+ * @Param comparisonExpr the original comparison expression
+ */
+ public IndexSearchCondition(
+ ExprNodeColumnDesc columnDesc,
+ String comparisonOp,
+ ExprNodeConstantDesc constantDesc,
+ ExprNodeDesc comparisonExpr) {
+
+ this.columnDesc = columnDesc;
+ this.comparisonOp = comparisonOp;
+ this.constantDesc = constantDesc;
+ this.comparisonExpr = comparisonExpr;
+ }
+
+ public void setColumnDesc(ExprNodeColumnDesc columnDesc) {
+ this.columnDesc = columnDesc;
+ }
+
+ public ExprNodeColumnDesc getColumnDesc() {
+ return columnDesc;
+ }
+
+ public void setComparisonOp(String comparisonOp) {
+ this.comparisonOp = comparisonOp;
+ }
+
+ public String getComparisonOp() {
+ return comparisonOp;
+ }
+
+ public void setConstantDesc(ExprNodeConstantDesc constantDesc) {
+ this.constantDesc = constantDesc;
+ }
+
+ public ExprNodeConstantDesc getConstantDesc() {
+ return constantDesc;
+ }
+
+ public void setComparisonExpr(ExprNodeDesc comparisonExpr) {
+ this.comparisonExpr = comparisonExpr;
+ }
+
+ public ExprNodeDesc getComparisonExpr() {
+ return comparisonExpr;
+ }
+
+ @Override
+ public String toString() {
+ return comparisonExpr.getExprString();
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java (revision 0)
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.index;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+
+/**
+ * IndexPredicateAnalyzer decomposes predicates, separating the parts
+ * which can be satisfied by an index from the parts which cannot.
+ * Currently, it only supports pure conjunctions over binary expressions
+ * comparing a column reference with a constant value. It is assumed
+ * that all column aliases encountered refer to the same table.
+ */
+public class IndexPredicateAnalyzer
+{
+ private Set udfNames;
+
+ private Set allowedColumnNames;
+
+ public IndexPredicateAnalyzer() {
+ udfNames = new HashSet();
+ }
+
+ /**
+ * Registers a comparison operator as one which can be satisfied
+ * by an index search. Unless this is called, analyzePredicate
+ * will never find any indexable conditions.
+ *
+ * @param udfName name of comparison operator as returned
+ * by either {@link GenericUDFBridge#getUdfName} (for simple UDF's)
+ * or udf.getClass().getName() (for generic UDF's).
+ */
+ public void addComparisonOp(String udfName) {
+ udfNames.add(udfName);
+ }
+
+ /**
+ * Clears the set of column names allowed in comparisons. (Initially, all
+ * column names are allowed.)
+ */
+ public void clearAllowedColumnNames() {
+ allowedColumnNames = new HashSet();
+ }
+
+ /**
+ * Adds a column name to the set of column names allowed.
+ *
+ * @param columnName name of column to be allowed
+ */
+ public void allowColumnName(String columnName) {
+ if (allowedColumnNames == null) {
+ clearAllowedColumnNames();
+ }
+ allowedColumnNames.add(columnName);
+ }
+
+ /**
+ * Analyzes a predicate.
+ *
+ * @param predicate predicate to be analyzed
+ *
+ * @param searchConditions receives conditions produced by analysis
+ *
+ * @return residual predicate which could not be translated to
+ * searchConditions
+ */
+ public ExprNodeDesc analyzePredicate(
+ ExprNodeDesc predicate,
+ final List searchConditions) {
+
+ Map opRules = new LinkedHashMap();
+ NodeProcessor nodeProcessor = new NodeProcessor() {
+ @Override
+ public Object process(Node nd, Stack stack,
+ NodeProcessorCtx procCtx, Object... nodeOutputs)
+ throws SemanticException {
+
+ // We can only push down stuff which appears as part of
+ // a pure conjunction: reject OR, CASE, etc.
+ for (Node ancestor : stack) {
+ if (nd == ancestor) {
+ break;
+ }
+ if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
+ return nd;
+ }
+ }
+
+ return analyzeExpr((ExprNodeDesc) nd, searchConditions, nodeOutputs);
+ }
+ };
+
+ Dispatcher disp = new DefaultRuleDispatcher(
+ nodeProcessor, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+ ArrayList topNodes = new ArrayList();
+ topNodes.add(predicate);
+ HashMap nodeOutput = new HashMap();
+ try {
+ ogw.startWalking(topNodes, nodeOutput);
+ } catch (SemanticException ex) {
+ throw new RuntimeException(ex);
+ }
+ ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
+ return residualPredicate;
+ }
+
+ private ExprNodeDesc analyzeExpr(
+ ExprNodeDesc expr,
+ List searchConditions,
+ Object... nodeOutputs) {
+
+ if (!(expr instanceof ExprNodeGenericFuncDesc)) {
+ return expr;
+ }
+ if (FunctionRegistry.isOpAnd(expr)) {
+ assert(nodeOutputs.length == 2);
+ ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
+ ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1];
+ if (residual1 == null) {
+ return residual2;
+ }
+ if (residual2 == null) {
+ return residual1;
+ }
+ List residuals = new ArrayList();
+ residuals.add(residual1);
+ residuals.add(residual2);
+ return new ExprNodeGenericFuncDesc(
+ TypeInfoFactory.booleanTypeInfo,
+ FunctionRegistry.getGenericUDFForAnd(),
+ residuals);
+ }
+
+ String udfName;
+ ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr;
+ if (funcDesc.getGenericUDF() instanceof GenericUDFBridge) {
+ GenericUDFBridge func = (GenericUDFBridge) funcDesc.getGenericUDF();
+ udfName = func.getUdfName();
+ } else {
+ udfName = funcDesc.getGenericUDF().getClass().getName();
+ }
+ if (!udfNames.contains(udfName)) {
+ return expr;
+ }
+
+ ExprNodeDesc child1 = (ExprNodeDesc) nodeOutputs[0];
+ ExprNodeDesc child2 = (ExprNodeDesc) nodeOutputs[1];
+ ExprNodeColumnDesc columnDesc = null;
+ ExprNodeConstantDesc constantDesc = null;
+ if ((child1 instanceof ExprNodeColumnDesc)
+ && (child2 instanceof ExprNodeConstantDesc)) {
+ // COL CONSTANT
+ columnDesc = (ExprNodeColumnDesc) child1;
+ constantDesc = (ExprNodeConstantDesc) child2;
+ } else if ((child2 instanceof ExprNodeColumnDesc)
+ && (child1 instanceof ExprNodeConstantDesc)) {
+ // CONSTANT COL
+ columnDesc = (ExprNodeColumnDesc) child2;
+ constantDesc = (ExprNodeConstantDesc) child1;
+ }
+ if (columnDesc == null) {
+ return expr;
+ }
+ if (allowedColumnNames != null) {
+ if (!allowedColumnNames.contains(columnDesc.getColumn())) {
+ return expr;
+ }
+ }
+ searchConditions.add(
+ new IndexSearchCondition(
+ columnDesc,
+ udfName,
+ constantDesc,
+ expr));
+
+ // we converted the expression to a search condition, so
+ // remove it from the residual predicate
+ return null;
+ }
+
+ /**
+ * Translates search conditions back to ExprNodeDesc form (as
+ * a left-deep conjunction).
+ *
+ * @param searchConditions (typically produced by analyzePredicate)
+ *
+ * @return ExprNodeDesc form of search conditions
+ */
+ public ExprNodeDesc translateSearchConditions(
+ List searchConditions) {
+
+ ExprNodeDesc expr = null;
+ for (IndexSearchCondition searchCondition : searchConditions) {
+ if (expr == null) {
+ expr = searchCondition.getComparisonExpr();
+ continue;
+ }
+ List children = new ArrayList();
+ children.add(expr);
+ children.add(searchCondition.getComparisonExpr());
+ expr = new ExprNodeGenericFuncDesc(
+ TypeInfoFactory.booleanTypeInfo,
+ FunctionRegistry.getGenericUDFForAnd(),
+ children);
+ }
+ return expr;
+ }
+}