Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java (revision 1611129) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java (working copy) @@ -11,24 +11,31 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.eigenbase.rel.RelNode; import org.eigenbase.rel.TableAccessRel; import org.eigenbase.relopt.RelOptAbstractTable; import org.eigenbase.relopt.RelOptSchema; import org.eigenbase.reltype.RelDataType; +import org.eigenbase.rex.RexNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap.Builder; /* - * Fix Me: + * Fix Me: * 1. Column Pruning * 2. Partition Pruning * 3. Stats @@ -43,6 +50,7 @@ Map m_hiveColStatsMap = new HashMap(); private Integer m_numPartitions; private final int m_noOfProjs; + PrunedPartitionList partitionList; protected static final Log LOG = LogFactory.getLog(RelOptHiveTable.class .getName()); @@ -200,6 +208,46 @@ } } + public void computePartitionList(HiveConf conf, RexNode pruneNode) + throws HiveException { + partitionList = null; + if (pruneNode == null) { + return; + } + + ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), + getRowType(), true)); + + partitionList = PartitionPruner.prune(m_hiveTblMetadata, pruneExpr, conf, + getName(), new HashMap()); + } + + public void setStatistics(HiveConf conf) { + List neededColumns = new ArrayList(); + List m_hiveColStats = new ArrayList(); + for (ColumnInfo ci : m_hiveNonPartitionCols) { + neededColumns.add(ci.getInternalName()); + } + + if (partitionList == null) { + // TODO: Fix below two stats + m_hiveColStats = StatsUtils.getTableColumnStats(m_hiveTblMetadata, + m_hiveNonPartitionCols, neededColumns); + m_rowCount = StatsUtils.getNumRows(m_hiveTblMetadata); + } else { + Statistics stats = StatsUtils.collectStatistics(conf, partitionList, + m_hiveTblMetadata, m_hiveNonPartitionCols, neededColumns); + m_rowCount = stats.getNumRows(); + m_hiveColStats = new ArrayList(); + for(String c : neededColumns) { + ColStatistics cs = stats.getColumnStatisticsFromColName(c); + if ( cs != null ) { + m_hiveColStats.add(cs); + } + } + } + } + public List getColStat(List projIndxLst) { List hiveColStatLst = new LinkedList(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java (revision 1611129) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java (working copy) @@ -273,7 +273,7 @@ SqlOperator op = call.getOperator(); List astNodeLst = new LinkedList(); if (op.kind == SqlKind.CAST) { - HiveToken ht = TypeConverter.convert(call.getType()); + HiveToken ht = TypeConverter.hiveToken(call.getType()); ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text); if (ht.args != null) { for (String castArg : ht.args) Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java (revision 1611129) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java (working copy) @@ -20,11 +20,13 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.eigenbase.relopt.RelOptCluster; import org.eigenbase.reltype.RelDataType; import org.eigenbase.reltype.RelDataTypeFactory; +import org.eigenbase.reltype.RelDataTypeField; import org.eigenbase.rex.RexBuilder; import org.eigenbase.sql.type.SqlTypeName; @@ -200,9 +202,83 @@ // @todo what do we about unions? throw new UnsupportedOperationException(); } + + public static TypeInfo convert(RelDataType rType) { + if ( rType.isStruct() ) { + return convertStructType(rType); + } else if ( rType.getComponentType() != null ) { + return convertListType(rType); + } else if ( rType.getKeyType() != null ) { + return convertMapType(rType); + } else { + return convertPrimtiveType(rType); + } + } + + public static TypeInfo convertStructType(RelDataType rType) { + List fTypes = Lists.transform( + rType.getFieldList(), + new Function() { + public TypeInfo apply(RelDataTypeField f) { + return convert(f.getType()); + } + }); + List fNames = Lists.transform( + rType.getFieldList(), + new Function() { + public String apply(RelDataTypeField f) { + return f.getName(); + } + }); + return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); + } + + public static TypeInfo convertMapType(RelDataType rType) { + return TypeInfoFactory.getMapTypeInfo(convert(rType.getKeyType()), + convert(rType.getValueType())); + } + + public static TypeInfo convertListType(RelDataType rType) { + return TypeInfoFactory.getListTypeInfo(convert(rType.getComponentType())); + } + + public static TypeInfo convertPrimtiveType(RelDataType rType) { + switch(rType.getSqlTypeName()) { + case BOOLEAN: + return TypeInfoFactory.booleanTypeInfo; + case TINYINT: + return TypeInfoFactory.byteTypeInfo; + case SMALLINT: + return TypeInfoFactory.shortTypeInfo; + case INTEGER: + return TypeInfoFactory.intTypeInfo; + case BIGINT: + return TypeInfoFactory.longTypeInfo; + case FLOAT: + return TypeInfoFactory.floatTypeInfo; + case DOUBLE: + return TypeInfoFactory.doubleTypeInfo; + case DATE: + return TypeInfoFactory.dateTypeInfo; + case TIMESTAMP: + return TypeInfoFactory.timestampTypeInfo; + case BINARY: + return TypeInfoFactory.binaryTypeInfo; + case DECIMAL: + return TypeInfoFactory.getDecimalTypeInfo(rType.getPrecision(), rType.getScale()); + case VARCHAR: + return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); + case CHAR: + return TypeInfoFactory.getCharTypeInfo(rType.getPrecision()); + case OTHER: + default: + return TypeInfoFactory.voidTypeInfo; + } + + } /*********************** Convert Optiq Types To Hive Types ***********************/ - public static HiveToken convert(RelDataType optiqType) { + public static HiveToken hiveToken(RelDataType optiqType) { HiveToken ht = null; switch (optiqType.getSqlTypeName()) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java (revision 0) @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.optiq.translator; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexCall; +import org.eigenbase.rex.RexInputRef; +import org.eigenbase.rex.RexLiteral; +import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexVisitorImpl; + +/* + * convert a RexNode to an ExprNodeDesc + */ +public class ExprNodeConverter extends RexVisitorImpl { + + RelDataType rType; + String tabAlias; + boolean partitioningExpr; + + public ExprNodeConverter(String tabAlias, RelDataType rType, + boolean partitioningExpr) { + super(true); + /* + * hb: 6/25/14 for now we only support expressions that only contain + * partition cols. there is no use case for supporting generic expressions. + * for supporting generic exprs., we need to give the converter information + * on whether a column is a partition column or not, whether a column is a + * virtual column or not. + */ + assert partitioningExpr == true; + this.tabAlias = tabAlias; + this.rType = rType; + this.partitioningExpr = partitioningExpr; + } + + @Override + public ExprNodeDesc visitInputRef(RexInputRef inputRef) { + RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), + f.getName(), tabAlias, partitioningExpr); + } + + @Override + public ExprNodeDesc visitCall(RexCall call) { + if (!deep) { + return null; + } + + List args = new LinkedList(); + + for (RexNode operand : call.operands) { + args.add(operand.accept(this)); + } + + return new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), + SqlFunctionConverter.getHiveUDF(call.getOperator()), args); + } + + @Override + public ExprNodeDesc visitLiteral(RexLiteral literal) { + RelDataType lType = literal.getType(); + + switch (literal.getTypeName()) { + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, + literal.getValue3()); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, + literal.getValue3()); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, + literal.getValue3()); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + literal.getValue3()); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, + literal.getValue3()); + case FLOAT: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, + literal.getValue3()); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, + literal.getValue3()); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + literal.getValue3()); + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + literal.getValue3()); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, + literal.getValue3()); + case DECIMAL: + return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo( + lType.getPrecision(), lType.getScale()), literal.getValue3()); + case VARCHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.getVarcharTypeInfo(lType + .getPrecision()), + new HiveVarchar((String)literal.getValue3(), lType.getPrecision())); + case CHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.getCharTypeInfo(lType + .getPrecision()), + new HiveChar((String)literal.getValue3(), lType.getPrecision())); + case OTHER: + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, + literal.getValue3()); + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java (revision 1611129) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java (working copy) @@ -28,16 +28,20 @@ import org.eigenbase.util.Util; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; public class SqlFunctionConverter { static final Map hiveToOptiq; static final Map optiqToHiveToken; + static final Map reverseOperatorMap; + static { Builder builder = new Builder(); hiveToOptiq = builder.hiveToOptiq; optiqToHiveToken = builder.optiqToHiveToken; + reverseOperatorMap = ImmutableMap.copyOf(builder.reverseOperatorMap); } public static SqlOperator getOptiqOperator(GenericUDF hiveUDF, @@ -45,6 +49,12 @@ return getOptiqFn(getName(hiveUDF), optiqArgTypes, retType); } + public static GenericUDF getHiveUDF(SqlOperator op) { + String name = reverseOperatorMap.get(op); + FunctionInfo hFn = name != null ? FunctionRegistry.getFunctionInfo(name) : null; + return hFn == null ? null : hFn.getGenericUDF(); + } + // TODO: 1) handle Agg Func Name translation 2) is it correct to add func args // as child of func? public static ASTNode buildAST(SqlOperator op, List children) { @@ -93,7 +103,7 @@ private static class Builder { final Map hiveToOptiq = Maps.newHashMap(); final Map optiqToHiveToken = Maps.newHashMap(); - + final Map reverseOperatorMap = Maps.newHashMap(); Builder() { registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+")); registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-")); @@ -113,6 +123,7 @@ } private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) { + reverseOperatorMap.put(optiqFn, name); FunctionInfo hFn = FunctionRegistry.getFunctionInfo(name); if (hFn != null) { String hFnName = getName(hFn.getGenericUDF()); @@ -142,10 +153,12 @@ m_retType = retType; } + @Override public List getParameterTypes(final RelDataTypeFactory typeFactory) { return m_argTypes; } + @Override public RelDataType getReturnType(final RelDataTypeFactory typeFactory) { return m_retType; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java (revision 0) @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.optiq.rules; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.eigenbase.relopt.RelOptCluster; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexCall; +import org.eigenbase.rex.RexInputRef; +import org.eigenbase.rex.RexLiteral; +import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexVisitorImpl; +import org.eigenbase.sql.fun.SqlStdOperatorTable; +import org.eigenbase.util.Pair; + +public class PartitionPruner { + + /** + * Breaks the predicate into 2 pieces. The first piece is the expressions that + * only contain partition columns and can be used for Partition Pruning; the + * second piece is the predicates that are left. + * + * @param cluster + * @param hiveTable + * @param predicate + * @return a Pair of expressions, each of which maybe null. The 1st predicate + * is expressions that only contain partition columns; the 2nd + * predicate contains the remaining predicates. + */ + public static Pair extractPartitionPredicates( + RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) { + RexNode partitionPruningPred = predicate + .accept(new ExtractPartPruningPredicate(cluster, hiveTable)); + RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate( + cluster, partitionPruningPred)); + return new Pair(partitionPruningPred, remainingPred); + } + + public static class ExtractPartPruningPredicate extends + RexVisitorImpl { + + final RelOptHiveTable hiveTable; + final RelDataType rType; + final Set partCols; + final RelOptCluster cluster; + + public ExtractPartPruningPredicate(RelOptCluster cluster, + RelOptHiveTable hiveTable) { + super(true); + this.hiveTable = hiveTable; + rType = hiveTable.getRowType(); + List pfs = hiveTable.getHiveTableMD().getPartCols(); + partCols = new HashSet(); + for (FieldSchema pf : pfs) { + partCols.add(pf.getName()); + } + this.cluster = cluster; + } + + @Override + public RexNode visitLiteral(RexLiteral literal) { + return literal; + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef) { + RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + if (partCols.contains(f.getName())) { + return inputRef; + } else { + return null; + } + } + + @Override + public RexNode visitCall(RexCall call) { + if (!deep) { + return null; + } + + List args = new LinkedList(); + boolean argsPruned = false; + + for (RexNode operand : call.operands) { + RexNode n = operand.accept(this); + if (n != null) { + args.add(n); + } else { + argsPruned = true; + } + } + + if (call.getOperator() != SqlStdOperatorTable.AND) { + return argsPruned ? null : call; + } else { + if (args.size() == 0) { + return null; + } else if (args.size() == 1) { + return args.get(0); + } else { + return cluster.getRexBuilder().makeCall(call.getOperator(), args); + } + } + } + + } + + public static class ExtractRemainingPredicate extends RexVisitorImpl { + + List pruningPredicates; + final RelOptCluster cluster; + + public ExtractRemainingPredicate(RelOptCluster cluster, + RexNode partPruningExpr) { + super(true); + this.cluster = cluster; + pruningPredicates = new ArrayList(); + flattenPredicates(partPruningExpr); + } + + private void flattenPredicates(RexNode r) { + if (r instanceof RexCall + && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) { + for (RexNode c : ((RexCall) r).getOperands()) { + flattenPredicates(c); + } + } else { + pruningPredicates.add(r); + } + } + + @Override + public RexNode visitLiteral(RexLiteral literal) { + return literal; + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef) { + return inputRef; + } + + @Override + public RexNode visitCall(RexCall call) { + if (!deep) { + return null; + } + + if (call.getOperator() != SqlStdOperatorTable.AND) { + if (pruningPredicates.contains(call)) { + return null; + } else { + return call; + } + } + + List args = new LinkedList(); + + for (RexNode operand : call.operands) { + RexNode n = operand.accept(this); + if (n != null) { + args.add(n); + } + } + + if (args.size() == 0) { + return null; + } else if (args.size() == 1) { + return args.get(0); + } else { + return cluster.getRexBuilder().makeCall(call.getOperator(), args); + } + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java (revision 0) @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.optiq.rules; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; +import org.eigenbase.rel.FilterRelBase; +import org.eigenbase.relopt.RelOptRule; +import org.eigenbase.relopt.RelOptRuleCall; +import org.eigenbase.relopt.RelOptUtil.InputFinder; +import org.eigenbase.rex.RexNode; +import org.eigenbase.util.Pair; + +public class HivePartitionPrunerRule extends RelOptRule { + + HiveConf conf; + + public HivePartitionPrunerRule(HiveConf conf) { + super(operand(HiveFilterRel.class, operand(HiveTableScanRel.class, none()))); + this.conf = conf; + } + + @Override + public void onMatch(RelOptRuleCall call) { + HiveFilterRel filter = call.rel(0); + HiveTableScanRel tScan = call.rel(1); + perform(call, filter, tScan); + } + + protected void perform(RelOptRuleCall call, FilterRelBase filter, + HiveTableScanRel tScan) { + + RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable(); + RexNode predicate = filter.getCondition(); + + Pair predicates = PartitionPruner + .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate); + RexNode partColExpr = predicates.left; + RexNode remainingExpr = predicates.right; + remainingExpr = remainingExpr == null ? filter.getCluster().getRexBuilder() + .makeLiteral(true) : remainingExpr; + + if (partColExpr == null || InputFinder.bits(partColExpr).length() == 0 ) { + return; + } + + try { + hiveTable.computePartitionList(conf, partColExpr); + } catch (HiveException he) { + throw new RuntimeException(he); + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveTableStatsRule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveTableStatsRule.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveTableStatsRule.java (revision 0) @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.optiq.rules; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; +import org.eigenbase.relopt.RelOptRule; +import org.eigenbase.relopt.RelOptRuleCall; + +/* + * get table level stats from Hive metaStore. + */ +public class HiveTableStatsRule extends RelOptRule { + + HiveConf conf; + + public HiveTableStatsRule(HiveConf conf) { + super(operand(HiveTableScanRel.class, none())); + this.conf = conf; + } + + @Override + public void onMatch(RelOptRuleCall call) { + HiveTableScanRel tScan = call.rel(0); + perform(call, tScan); + } + + protected void perform(RelOptRuleCall call, HiveTableScanRel tScan) { + RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable(); + hiveTable.setStatistics(conf); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1611129) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** * The transformation step that does partition pruning. @@ -155,7 +156,7 @@ * pruner condition. * @throws HiveException */ - private static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, + public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map prunedPartitionsMap) throws HiveException { LOG.trace("Started pruning partiton"); @@ -177,6 +178,17 @@ prunedPartitionsMap.put(key, ret); return ret; } + + private static ExprNodeDesc removeTruePredciates(ExprNodeDesc e) { + if (e instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc eC = (ExprNodeConstantDesc) e; + if (e.getTypeInfo() == TypeInfoFactory.booleanTypeInfo + && eC.getValue() == Boolean.TRUE) { + return null; + } + } + return e; + } /** * Taking a partition pruning expression, remove the null operands and non-partition columns. @@ -187,7 +199,8 @@ */ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { if (expr instanceof ExprNodeConstantDesc) { - if (((ExprNodeConstantDesc)expr).getValue() == null) { + expr = removeTruePredciates(expr); + if (expr == null || ((ExprNodeConstantDesc)expr).getValue() == null) { return null; } else { throw new IllegalStateException("Unexpected non-null ExprNodeConstantDesc: " @@ -198,10 +211,11 @@ boolean isAnd = udf instanceof GenericUDFOPAnd; if (isAnd || udf instanceof GenericUDFOPOr) { List children = expr.getChildren(); - ExprNodeDesc left = children.get(0); - children.set(0, compactExpr(left)); - ExprNodeDesc right = children.get(1); - children.set(1, compactExpr(right)); + ExprNodeDesc left = removeTruePredciates(children.get(0)); + children.set(0, left == null ? null : compactExpr(left)); + ExprNodeDesc right = removeTruePredciates(children.get(1)); + children.set(1, right == null ? null : compactExpr(right)); + // Note that one does not simply compact (not-null or null) to not-null. // Only if we have an "and" is it valid to send one side to metastore. if (children.get(0) == null && children.get(1) == null) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1611129) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -122,11 +122,13 @@ import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveMergeProjectRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePullUpProjectsAboveJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushJoinThroughJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveSwapJoinRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveTableStatsRule; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter; @@ -237,6 +239,7 @@ import org.eigenbase.relopt.RelOptCluster; import org.eigenbase.relopt.RelOptPlanner; import org.eigenbase.relopt.RelOptQuery; +import org.eigenbase.relopt.RelOptRule; import org.eigenbase.relopt.RelOptSchema; import org.eigenbase.relopt.RelTraitSet; import org.eigenbase.relopt.hep.HepPlanner; @@ -11860,9 +11863,27 @@ public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { + basePlan = hepPlan(basePlan, mdProvider, + HivePushFilterPastJoinRule.FILTER_ON_JOIN, + HivePushFilterPastJoinRule.JOIN, new HivePartitionPrunerRule( + SemanticAnalyzer.this.conf)); + + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null); + basePlan = fieldTrimmer.trim(basePlan); + + basePlan = hepPlan(basePlan, mdProvider, new HiveTableStatsRule( + SemanticAnalyzer.this.conf)); + + return basePlan; + } + + private RelNode hepPlan(RelNode basePlan, + RelMetadataProvider mdProvider, RelOptRule...rules) { + HepProgramBuilder programBuilder = new HepProgramBuilder(); - programBuilder.addRuleInstance(HivePushFilterPastJoinRule.FILTER_ON_JOIN); - programBuilder.addRuleInstance(HivePushFilterPastJoinRule.JOIN); + for(RelOptRule rule : rules) { + programBuilder.addRuleInstance(rule); + } HepPlanner planner = new HepPlanner(programBuilder.build()); List list = Lists.newArrayList(); @@ -11873,12 +11894,7 @@ new CachingRelMetadataProvider(chainedProvider, planner)); planner.setRoot(basePlan); - basePlan = planner.findBestExp(); - - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null); - basePlan = fieldTrimmer.trim(basePlan); - return basePlan; - + return planner.findBestExp(); } private RelNode genUnionLogicalPlan(String unionalias, String leftalias, Index: ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (revision 1611129) +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (working copy) @@ -87,6 +87,7 @@ private static final Log LOG = LogFactory.getLog(StatsUtils.class.getName()); + /** * Collect table, partition and column level statistics * @param conf @@ -103,11 +104,18 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, TableScanOperator tableScanOperator) { - Statistics stats = new Statistics(); - // column level statistics are required only for the columns that are needed List schema = tableScanOperator.getSchema().getSignature(); List neededColumns = tableScanOperator.getNeededColumns(); + + return collectStatistics(conf, partList, table, schema, neededColumns); + } + + public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, + Table table, List schema, List neededColumns) { + + Statistics stats = new Statistics(); + boolean fetchColStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); boolean fetchPartStats =