diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java index 928afe6..31b9981 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java @@ -4,221 +4,263 @@ import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.eigenbase.rel.RelNode; import org.eigenbase.rel.TableAccessRel; import org.eigenbase.relopt.RelOptAbstractTable; import org.eigenbase.relopt.RelOptSchema; import org.eigenbase.reltype.RelDataType; +import org.eigenbase.rex.RexNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap.Builder; -/* - * Fix Me: - * 1. Column Pruning - * 2. Partition Pruning - * 3. Stats - */ - public class RelOptHiveTable extends RelOptAbstractTable { - private final Table m_hiveTblMetadata; - private double m_rowCount = -1; - private final ImmutableList m_hiveNonPartitionCols; - private final ImmutableMap m_hiveNonPartitionColsMap; - private final ImmutableMap m_hivePartitionColsMap; - Map m_hiveColStatsMap = new HashMap(); - private Integer m_numPartitions; - private final int m_noOfProjs; - - protected static final Log LOG = LogFactory.getLog(RelOptHiveTable.class - .getName()); - - public RelOptHiveTable(RelOptSchema optiqSchema, String name, - RelDataType rowType, Table hiveTblMetadata, - List hiveNonPartitionCols, - List hivePartitionCols) { - super(optiqSchema, name, rowType); - m_hiveTblMetadata = hiveTblMetadata; - m_hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols); - m_hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0); - m_hivePartitionColsMap = getColInfoMap(hivePartitionCols, - m_hiveNonPartitionColsMap.size()); - m_noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size(); - } - - private static ImmutableMap getColInfoMap( - List hiveCols, int startIndx) { - Builder bldr = ImmutableMap - . builder(); - - int indx = startIndx; - for (ColumnInfo ci : hiveCols) { - bldr.put(indx, ci); - indx++; - } - - return bldr.build(); - } - - @Override - public boolean isKey(BitSet arg0) { - return false; - } - - @Override - public RelNode toRel(ToRelContext context) { - return new TableAccessRel(context.getCluster(), this); - } - - @Override - public T unwrap(Class arg0) { - return arg0.isInstance(this) ? arg0.cast(this) : null; - } - - @Override - public double getRowCount() { - if (m_rowCount == -1) - m_rowCount = StatsUtils.getNumRows(m_hiveTblMetadata); - - return m_rowCount; - } - - public Table getHiveTableMD() { - return m_hiveTblMetadata; - } - - private String getColNamesForLogging(Set colLst) { - StringBuffer sb = new StringBuffer(); - boolean firstEntry = true; - for (String colName : colLst) { - if (firstEntry) { - sb.append(colName); - firstEntry = false; - } else { - sb.append(", " + colName); - } - } - return sb.toString(); - } - - private void updateColStats(Set projIndxLst) { - List nonPartColNamesThatRqrStats = new ArrayList(); - List nonPartColIndxsThatRqrStats = new ArrayList(); - List partColNamesThatRqrStats = new ArrayList(); - List partColIndxsThatRqrStats = new ArrayList(); - Set colNamesFailedStats = new HashSet(); - - // 1. Separate required columns to Non Partition and Partition Cols - ColumnInfo tmp; - for (Integer pi : projIndxLst) { - if (m_hiveColStatsMap.get(pi) == null) { - if ((tmp = m_hiveNonPartitionColsMap.get(pi)) != null) { - nonPartColNamesThatRqrStats.add(tmp.getInternalName()); - nonPartColIndxsThatRqrStats.add(pi); - } else if ((tmp = m_hivePartitionColsMap.get(pi)) != null) { - partColNamesThatRqrStats.add(tmp.getInternalName()); - partColIndxsThatRqrStats.add(pi); - } else { - String logMsg = "Unable to find Column Index: " + pi - + ", in " + m_hiveTblMetadata.getCompleteName(); - LOG.error(logMsg); - throw new RuntimeException(logMsg); - } - } - } - - // 2. Obtain Col Stats for Non Partition Cols - if (nonPartColNamesThatRqrStats.size() > 0) { - List colStats = StatsUtils.getTableColumnStats( - m_hiveTblMetadata, m_hiveNonPartitionCols, - nonPartColNamesThatRqrStats); - if (colStats != null - && colStats.size() == nonPartColNamesThatRqrStats.size()) { - for (int i = 0; i < colStats.size(); i++) { - m_hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), - colStats.get(i)); - } - } else { - // TODO: colNamesFailedStats is designed to be used for both non - // partitioned & partitioned cols; currently only used for non - // partitioned cols. - colNamesFailedStats.addAll(nonPartColNamesThatRqrStats); - } - } - - // 3. Obtain Stats for Partition Cols - // TODO: Fix this as part of Partition Pruning - if (!partColNamesThatRqrStats.isEmpty()) { - if (m_numPartitions == null) { - try { - m_numPartitions = Hive - .get() - .getPartitionNames(m_hiveTblMetadata.getDbName(), - m_hiveTblMetadata.getTableName(), - (short) -1).size(); - } catch (HiveException e) { - String logMsg = "Could not get stats, number of Partitions for " - + m_hiveTblMetadata.getCompleteName(); - LOG.error(logMsg); - throw new RuntimeException(logMsg); - } - } - - ColStatistics cStats = null; - for (int i = 0; i < partColNamesThatRqrStats.size(); i++) { - cStats = new ColStatistics(m_hiveTblMetadata.getTableName(), - partColNamesThatRqrStats.get(i), m_hivePartitionColsMap - .get(partColIndxsThatRqrStats.get(i)) - .getTypeName()); - cStats.setCountDistint(m_numPartitions); - - m_hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats); - } - } - - // 4. Warn user if we could get stats for required columns - if (!colNamesFailedStats.isEmpty()) { - String logMsg = "No Stats for " - + m_hiveTblMetadata.getCompleteName() + ", Columns: " - + getColNamesForLogging(colNamesFailedStats); - LOG.error(logMsg); - throw new RuntimeException(logMsg); - } - } - - public List getColStat(List projIndxLst) { - List hiveColStatLst = new LinkedList(); - - if (projIndxLst != null) { - updateColStats(new HashSet(projIndxLst)); - for (Integer i : projIndxLst) { - hiveColStatLst.add(m_hiveColStatsMap.get(i)); - } - } else { - List pILst = new ArrayList(); - for (Integer i = 0; i < m_noOfProjs; i++) { - pILst.add(i); - } - updateColStats(new HashSet(pILst)); - for (Integer pi : pILst) { - hiveColStatLst.add(m_hiveColStatsMap.get(pi)); - } - } - - return hiveColStatLst; - } + private final Table m_hiveTblMetadata; + private final ImmutableList m_hiveNonPartitionCols; + private final ImmutableMap m_hiveNonPartitionColsMap; + private final ImmutableMap m_hivePartitionColsMap; + private final int m_noOfProjs; + final HiveConf m_hiveConf; + + private double m_rowCount = -1; + Map m_hiveColStatsMap = new HashMap(); + private Integer m_numPartitions; + PrunedPartitionList partitionList; + + protected static final Log LOG = LogFactory + .getLog(RelOptHiveTable.class + .getName()); + + public RelOptHiveTable(RelOptSchema optiqSchema, String name, RelDataType rowType, + Table hiveTblMetadata, List hiveNonPartitionCols, + List hivePartitionCols, HiveConf hconf) { + super(optiqSchema, name, rowType); + m_hiveTblMetadata = hiveTblMetadata; + m_hiveNonPartitionCols = ImmutableList.copyOf(hiveNonPartitionCols); + m_hiveNonPartitionColsMap = getColInfoMap(hiveNonPartitionCols, 0); + m_hivePartitionColsMap = getColInfoMap(hivePartitionCols, m_hiveNonPartitionColsMap.size()); + m_noOfProjs = hiveNonPartitionCols.size() + hivePartitionCols.size(); + m_hiveConf = hconf; + } + + private static ImmutableMap getColInfoMap(List hiveCols, + int startIndx) { + Builder bldr = ImmutableMap. builder(); + + int indx = startIndx; + for (ColumnInfo ci : hiveCols) { + bldr.put(indx, ci); + indx++; + } + + return bldr.build(); + } + + @Override + public boolean isKey(BitSet arg0) { + return false; + } + + @Override + public RelNode toRel(ToRelContext context) { + return new TableAccessRel(context.getCluster(), this); + } + + @Override + public T unwrap(Class arg0) { + return arg0.isInstance(this) ? arg0.cast(this) : null; + } + + @Override + public double getRowCount() { + if (m_rowCount == -1) + m_rowCount = StatsUtils.getNumRows(m_hiveTblMetadata); + + return m_rowCount; + } + + public Table getHiveTableMD() { + return m_hiveTblMetadata; + } + + private String getColNamesForLogging(Set colLst) { + StringBuffer sb = new StringBuffer(); + boolean firstEntry = true; + for (String colName : colLst) { + if (firstEntry) { + sb.append(colName); + firstEntry = false; + } else { + sb.append(", " + colName); + } + } + return sb.toString(); + } + + public void computePartitionList(HiveConf conf, RexNode pruneNode) throws HiveException { + partitionList = null; + if (pruneNode == null) { + return; + } + + ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true)); + + partitionList = PartitionPruner.prune(m_hiveTblMetadata, pruneExpr, conf, getName(), + new HashMap()); + } + + private void updateColStats(Set projIndxLst) { + List nonPartColNamesThatRqrStats = new ArrayList(); + List nonPartColIndxsThatRqrStats = new ArrayList(); + List partColNamesThatRqrStats = new ArrayList(); + List partColIndxsThatRqrStats = new ArrayList(); + Set colNamesFailedStats = new HashSet(); + + // 1. Separate required columns to Non Partition and Partition Cols + ColumnInfo tmp; + for (Integer pi : projIndxLst) { + if (m_hiveColStatsMap.get(pi) == null) { + if ((tmp = m_hiveNonPartitionColsMap.get(pi)) != null) { + nonPartColNamesThatRqrStats.add(tmp.getInternalName()); + nonPartColIndxsThatRqrStats.add(pi); + } else if ((tmp = m_hivePartitionColsMap.get(pi)) != null) { + partColNamesThatRqrStats.add(tmp.getInternalName()); + partColIndxsThatRqrStats.add(pi); + } else { + String logMsg = "Unable to find Column Index: " + pi + ", in " + + m_hiveTblMetadata.getCompleteName(); + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } + } + } + + // 2. Obtain Col Stats for Non Partition Cols + if (nonPartColNamesThatRqrStats.size() > 0) { + List hiveColStats; + + // 2.1 Handle the case where we are scanning only a set of partitions + if (partitionList == null) { + hiveColStats = StatsUtils.getTableColumnStats(m_hiveTblMetadata, m_hiveNonPartitionCols, + nonPartColNamesThatRqrStats); + + // 2.1.1 Record Column Names that we needed stats for but couldn't + if (hiveColStats == null) { + colNamesFailedStats.addAll(nonPartColNamesThatRqrStats); + } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) { + Set setOfFiledCols = new HashSet(nonPartColNamesThatRqrStats); + + Set setOfObtainedColStats = new HashSet(); + for (ColStatistics cs : hiveColStats) { + setOfObtainedColStats.add(cs.getColumnName()); + } + setOfFiledCols.removeAll(setOfObtainedColStats); + + colNamesFailedStats.addAll(setOfFiledCols); + } + } else { + // 2.2 Obtain col stats for full table scan + Statistics stats = StatsUtils.collectStatistics(m_hiveConf, partitionList, + m_hiveTblMetadata, m_hiveNonPartitionCols, nonPartColNamesThatRqrStats); + m_rowCount = stats.getNumRows(); + hiveColStats = new ArrayList(); + for (String c : nonPartColNamesThatRqrStats) { + ColStatistics cs = stats.getColumnStatisticsFromColName(c); + if (cs != null) { + hiveColStats.add(cs); + } else { + colNamesFailedStats.add(c); + } + } + } + + if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) { + for (int i = 0; i < hiveColStats.size(); i++) { + m_hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i)); + } + } + } + + // 3. Obtain Stats for Partition Cols + // TODO: Just using no of partitions for NDV is a gross approximation for + // multi col partitions; Hack till HIVE-7392 gets fixed. + if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) { + if (m_numPartitions == null) { + try { + if (partitionList != null) { + m_numPartitions = partitionList.getPartitions().size(); + } else { + m_numPartitions = Hive + .get() + .getPartitionNames(m_hiveTblMetadata.getDbName(), m_hiveTblMetadata.getTableName(), + (short) -1).size(); + } + } catch (HiveException e) { + String logMsg = "Could not get stats, number of Partitions for " + + m_hiveTblMetadata.getCompleteName(); + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } + } + + ColStatistics cStats = null; + for (int i = 0; i < partColNamesThatRqrStats.size(); i++) { + cStats = new ColStatistics(m_hiveTblMetadata.getTableName(), + partColNamesThatRqrStats.get(i), m_hivePartitionColsMap.get( + partColIndxsThatRqrStats.get(i)).getTypeName()); + cStats.setCountDistint(m_numPartitions); + + m_hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats); + } + } + + // 4. Warn user if we could get stats for required columns + if (!colNamesFailedStats.isEmpty()) { + String logMsg = "No Stats for " + m_hiveTblMetadata.getCompleteName() + ", Columns: " + + getColNamesForLogging(colNamesFailedStats); + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } + } + + public List getColStat(List projIndxLst) { + ImmutableList.Builder colStatsBldr = ImmutableList. builder(); + + if (projIndxLst != null) { + updateColStats(new HashSet(projIndxLst)); + for (Integer i : projIndxLst) { + colStatsBldr.add(m_hiveColStatsMap.get(i)); + } + } else { + List pILst = new ArrayList(); + for (Integer i = 0; i < m_noOfProjs; i++) { + pILst.add(i); + } + updateColStats(new HashSet(pILst)); + for (Integer pi : pILst) { + colStatsBldr.add(m_hiveColStatsMap.get(pi)); + } + } + + return colStatsBldr.build(); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java new file mode 100644 index 0000000..ea96524 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.optiq.rules; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; +import org.eigenbase.rel.FilterRelBase; +import org.eigenbase.relopt.RelOptRule; +import org.eigenbase.relopt.RelOptRuleCall; +import org.eigenbase.relopt.RelOptUtil.InputFinder; +import org.eigenbase.rex.RexNode; +import org.eigenbase.util.Pair; + +public class HivePartitionPrunerRule extends RelOptRule { + + HiveConf conf; + + public HivePartitionPrunerRule(HiveConf conf) { + super(operand(HiveFilterRel.class, operand(HiveTableScanRel.class, none()))); + this.conf = conf; + } + + @Override + public void onMatch(RelOptRuleCall call) { + HiveFilterRel filter = call.rel(0); + HiveTableScanRel tScan = call.rel(1); + perform(call, filter, tScan); + } + + protected void perform(RelOptRuleCall call, FilterRelBase filter, + HiveTableScanRel tScan) { + + RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable(); + RexNode predicate = filter.getCondition(); + + Pair predicates = PartitionPruner + .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate); + RexNode partColExpr = predicates.left; + RexNode remainingExpr = predicates.right; + remainingExpr = remainingExpr == null ? filter.getCluster().getRexBuilder() + .makeLiteral(true) : remainingExpr; + + if (partColExpr == null || InputFinder.bits(partColExpr).length() == 0 ) { + return; + } + + try { + hiveTable.computePartitionList(conf, partColExpr); + } catch (HiveException he) { + throw new RuntimeException(he); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java index 4fb5249..0a03b62 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java @@ -8,7 +8,6 @@ import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel; import org.eigenbase.rel.FilterRelBase; import org.eigenbase.rel.JoinRelBase; -import org.eigenbase.rel.JoinRelType; import org.eigenbase.rel.RelNode; import org.eigenbase.relopt.RelOptRule; import org.eigenbase.relopt.RelOptRuleCall; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java new file mode 100644 index 0000000..d9d94f6 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/PartitionPruner.java @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.optiq.rules; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; +import org.eigenbase.relopt.RelOptCluster; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexCall; +import org.eigenbase.rex.RexInputRef; +import org.eigenbase.rex.RexLiteral; +import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexVisitorImpl; +import org.eigenbase.sql.fun.SqlStdOperatorTable; +import org.eigenbase.util.Pair; + +public class PartitionPruner { + + /** + * Breaks the predicate into 2 pieces. The first piece is the expressions that + * only contain partition columns and can be used for Partition Pruning; the + * second piece is the predicates that are left. + * + * @param cluster + * @param hiveTable + * @param predicate + * @return a Pair of expressions, each of which maybe null. The 1st predicate + * is expressions that only contain partition columns; the 2nd + * predicate contains the remaining predicates. + */ + public static Pair extractPartitionPredicates( + RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) { + RexNode partitionPruningPred = predicate + .accept(new ExtractPartPruningPredicate(cluster, hiveTable)); + RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate( + cluster, partitionPruningPred)); + return new Pair(partitionPruningPred, remainingPred); + } + + public static class ExtractPartPruningPredicate extends + RexVisitorImpl { + + final RelOptHiveTable hiveTable; + final RelDataType rType; + final Set partCols; + final RelOptCluster cluster; + + public ExtractPartPruningPredicate(RelOptCluster cluster, + RelOptHiveTable hiveTable) { + super(true); + this.hiveTable = hiveTable; + rType = hiveTable.getRowType(); + List pfs = hiveTable.getHiveTableMD().getPartCols(); + partCols = new HashSet(); + for (FieldSchema pf : pfs) { + partCols.add(pf.getName()); + } + this.cluster = cluster; + } + + @Override + public RexNode visitLiteral(RexLiteral literal) { + return literal; + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef) { + RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + if (partCols.contains(f.getName())) { + return inputRef; + } else { + return null; + } + } + + @Override + public RexNode visitCall(RexCall call) { + if (!deep) { + return null; + } + + List args = new LinkedList(); + boolean argsPruned = false; + + for (RexNode operand : call.operands) { + RexNode n = operand.accept(this); + if (n != null) { + args.add(n); + } else { + argsPruned = true; + } + } + + if (call.getOperator() != SqlStdOperatorTable.AND) { + return argsPruned ? null : call; + } else { + if (args.size() == 0) { + return null; + } else if (args.size() == 1) { + return args.get(0); + } else { + return cluster.getRexBuilder().makeCall(call.getOperator(), args); + } + } + } + + } + + public static class ExtractRemainingPredicate extends RexVisitorImpl { + + List pruningPredicates; + final RelOptCluster cluster; + + public ExtractRemainingPredicate(RelOptCluster cluster, + RexNode partPruningExpr) { + super(true); + this.cluster = cluster; + pruningPredicates = new ArrayList(); + flattenPredicates(partPruningExpr); + } + + private void flattenPredicates(RexNode r) { + if (r instanceof RexCall + && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) { + for (RexNode c : ((RexCall) r).getOperands()) { + flattenPredicates(c); + } + } else { + pruningPredicates.add(r); + } + } + + @Override + public RexNode visitLiteral(RexLiteral literal) { + return literal; + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef) { + return inputRef; + } + + @Override + public RexNode visitCall(RexCall call) { + if (!deep) { + return null; + } + + if (call.getOperator() != SqlStdOperatorTable.AND) { + if (pruningPredicates.contains(call)) { + return null; + } else { + return call; + } + } + + List args = new LinkedList(); + + for (RexNode operand : call.operands) { + RexNode n = operand.accept(this); + if (n != null) { + args.add(n); + } + } + + if (args.size() == 0) { + return null; + } else if (args.size() == 1) { + return args.get(0); + } else { + return cluster.getRexBuilder().makeCall(call.getOperator(), args); + } + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java index cb16836..1feee8e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTConverter.java @@ -273,7 +273,7 @@ public ASTNode visitCall(RexCall call) { SqlOperator op = call.getOperator(); List astNodeLst = new LinkedList(); if (op.kind == SqlKind.CAST) { - HiveToken ht = TypeConverter.convert(call.getType()); + HiveToken ht = TypeConverter.hiveToken(call.getType()); ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text); if (ht.args != null) { for (String castArg : ht.args) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java new file mode 100644 index 0000000..62346ac --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ExprNodeConverter.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.optiq.translator; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexCall; +import org.eigenbase.rex.RexInputRef; +import org.eigenbase.rex.RexLiteral; +import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexVisitorImpl; + +/* + * convert a RexNode to an ExprNodeDesc + */ +public class ExprNodeConverter extends RexVisitorImpl { + + RelDataType rType; + String tabAlias; + boolean partitioningExpr; + + public ExprNodeConverter(String tabAlias, RelDataType rType, + boolean partitioningExpr) { + super(true); + /* + * hb: 6/25/14 for now we only support expressions that only contain + * partition cols. there is no use case for supporting generic expressions. + * for supporting generic exprs., we need to give the converter information + * on whether a column is a partition column or not, whether a column is a + * virtual column or not. + */ + assert partitioningExpr == true; + this.tabAlias = tabAlias; + this.rType = rType; + this.partitioningExpr = partitioningExpr; + } + + @Override + public ExprNodeDesc visitInputRef(RexInputRef inputRef) { + RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), + f.getName(), tabAlias, partitioningExpr); + } + + @Override + public ExprNodeDesc visitCall(RexCall call) { + if (!deep) { + return null; + } + + List args = new LinkedList(); + + for (RexNode operand : call.operands) { + args.add(operand.accept(this)); + } + + return new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), + SqlFunctionConverter.getHiveUDF(call.getOperator()), args); + } + + @Override + public ExprNodeDesc visitLiteral(RexLiteral literal) { + RelDataType lType = literal.getType(); + + switch (literal.getTypeName()) { + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, + literal.getValue3()); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, + literal.getValue3()); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, + literal.getValue3()); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + literal.getValue3()); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, + literal.getValue3()); + case FLOAT: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, + literal.getValue3()); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, + literal.getValue3()); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + literal.getValue3()); + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + literal.getValue3()); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, + literal.getValue3()); + case DECIMAL: + return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo( + lType.getPrecision(), lType.getScale()), literal.getValue3()); + case VARCHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.getVarcharTypeInfo(lType + .getPrecision()), + new HiveVarchar((String)literal.getValue3(), lType.getPrecision())); + case CHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.getCharTypeInfo(lType + .getPrecision()), + new HiveChar((String)literal.getValue3(), lType.getPrecision())); + case OTHER: + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, + literal.getValue3()); + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java index a731f0b..461ca09 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java @@ -626,7 +626,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } RelDataType rowType = TypeConverter.getType(ctx.cluster, rr, neededCols); RelOptHiveTable optTable = new RelOptHiveTable(ctx.schema, tableScanOp.getConf().getAlias(), - rowType, ctx.sA.getTable(tableScanOp), null, null); + rowType, ctx.sA.getTable(tableScanOp), null, null, null); TableAccessRelBase tableRel = new HiveTableScanRel(ctx.cluster, ctx.cluster.traitSetOf(HiveRel.CONVENTION), optTable, rowType); ctx.buildColumnMap(tableScanOp, tableRel); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java index 15ebdc7..01646e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java @@ -28,16 +28,20 @@ import org.eigenbase.util.Util; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; public class SqlFunctionConverter { static final Map hiveToOptiq; static final Map optiqToHiveToken; + static final Map reverseOperatorMap; + static { Builder builder = new Builder(); hiveToOptiq = builder.hiveToOptiq; optiqToHiveToken = builder.optiqToHiveToken; + reverseOperatorMap = ImmutableMap.copyOf(builder.reverseOperatorMap); } public static SqlOperator getOptiqOperator(GenericUDF hiveUDF, @@ -45,6 +49,12 @@ public static SqlOperator getOptiqOperator(GenericUDF hiveUDF, return getOptiqFn(getName(hiveUDF), optiqArgTypes, retType); } + public static GenericUDF getHiveUDF(SqlOperator op) { + String name = reverseOperatorMap.get(op); + FunctionInfo hFn = name != null ? FunctionRegistry.getFunctionInfo(name) : null; + return hFn == null ? null : hFn.getGenericUDF(); + } + // TODO: 1) handle Agg Func Name translation 2) is it correct to add func args // as child of func? public static ASTNode buildAST(SqlOperator op, List children) { @@ -93,7 +103,7 @@ private static String getName(GenericUDF hiveUDF) { private static class Builder { final Map hiveToOptiq = Maps.newHashMap(); final Map optiqToHiveToken = Maps.newHashMap(); - + final Map reverseOperatorMap = Maps.newHashMap(); Builder() { registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+")); registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-")); @@ -113,6 +123,7 @@ private static String getName(GenericUDF hiveUDF) { } private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) { + reverseOperatorMap.put(optiqFn, name); FunctionInfo hFn = FunctionRegistry.getFunctionInfo(name); if (hFn != null) { String hFnName = getName(hFn.getGenericUDF()); @@ -142,10 +153,12 @@ public OptiqUDAF(String opName, SqlReturnTypeInference returnTypeInference, m_retType = retType; } + @Override public List getParameterTypes(final RelDataTypeFactory typeFactory) { return m_argTypes; } + @Override public RelDataType getReturnType(final RelDataTypeFactory typeFactory) { return m_retType; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java index 9671c14..5ee7b16 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java @@ -5,14 +5,10 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -20,11 +16,12 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.eigenbase.relopt.RelOptCluster; import org.eigenbase.reltype.RelDataType; import org.eigenbase.reltype.RelDataTypeFactory; +import org.eigenbase.reltype.RelDataTypeField; import org.eigenbase.rex.RexBuilder; import org.eigenbase.sql.type.SqlTypeName; @@ -200,9 +197,83 @@ public static RelDataType convert(UnionTypeInfo unionType, // @todo what do we about unions? throw new UnsupportedOperationException(); } + + public static TypeInfo convert(RelDataType rType) { + if ( rType.isStruct() ) { + return convertStructType(rType); + } else if ( rType.getComponentType() != null ) { + return convertListType(rType); + } else if ( rType.getKeyType() != null ) { + return convertMapType(rType); + } else { + return convertPrimtiveType(rType); + } + } + + public static TypeInfo convertStructType(RelDataType rType) { + List fTypes = Lists.transform( + rType.getFieldList(), + new Function() { + public TypeInfo apply(RelDataTypeField f) { + return convert(f.getType()); + } + }); + List fNames = Lists.transform( + rType.getFieldList(), + new Function() { + public String apply(RelDataTypeField f) { + return f.getName(); + } + }); + return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); + } + + public static TypeInfo convertMapType(RelDataType rType) { + return TypeInfoFactory.getMapTypeInfo(convert(rType.getKeyType()), + convert(rType.getValueType())); + } + + public static TypeInfo convertListType(RelDataType rType) { + return TypeInfoFactory.getListTypeInfo(convert(rType.getComponentType())); + } + + public static TypeInfo convertPrimtiveType(RelDataType rType) { + switch(rType.getSqlTypeName()) { + case BOOLEAN: + return TypeInfoFactory.booleanTypeInfo; + case TINYINT: + return TypeInfoFactory.byteTypeInfo; + case SMALLINT: + return TypeInfoFactory.shortTypeInfo; + case INTEGER: + return TypeInfoFactory.intTypeInfo; + case BIGINT: + return TypeInfoFactory.longTypeInfo; + case FLOAT: + return TypeInfoFactory.floatTypeInfo; + case DOUBLE: + return TypeInfoFactory.doubleTypeInfo; + case DATE: + return TypeInfoFactory.dateTypeInfo; + case TIMESTAMP: + return TypeInfoFactory.timestampTypeInfo; + case BINARY: + return TypeInfoFactory.binaryTypeInfo; + case DECIMAL: + return TypeInfoFactory.getDecimalTypeInfo(rType.getPrecision(), rType.getScale()); + case VARCHAR: + return TypeInfoFactory.getVarcharTypeInfo(rType.getPrecision()); + case CHAR: + return TypeInfoFactory.getCharTypeInfo(rType.getPrecision()); + case OTHER: + default: + return TypeInfoFactory.voidTypeInfo; + } + + } /*********************** Convert Optiq Types To Hive Types ***********************/ - public static HiveToken convert(RelDataType optiqType) { + public static HiveToken hiveToken(RelDataType optiqType) { HiveToken ht = null; switch (optiqType.getSqlTypeName()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 3c26894..3c1fb0d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** * The transformation step that does partition pruning. @@ -155,7 +156,7 @@ public static PrunedPartitionList prune(TableScanOperator ts, ParseContext parse * pruner condition. * @throws HiveException */ - private static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, + public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map prunedPartitionsMap) throws HiveException { LOG.trace("Started pruning partiton"); @@ -177,6 +178,17 @@ private static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, prunedPartitionsMap.put(key, ret); return ret; } + + private static ExprNodeDesc removeTruePredciates(ExprNodeDesc e) { + if (e instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc eC = (ExprNodeConstantDesc) e; + if (e.getTypeInfo() == TypeInfoFactory.booleanTypeInfo + && eC.getValue() == Boolean.TRUE) { + return null; + } + } + return e; + } /** * Taking a partition pruning expression, remove the null operands and non-partition columns. @@ -187,7 +199,8 @@ private static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, */ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { if (expr instanceof ExprNodeConstantDesc) { - if (((ExprNodeConstantDesc)expr).getValue() == null) { + expr = removeTruePredciates(expr); + if (expr == null || ((ExprNodeConstantDesc)expr).getValue() == null) { return null; } else { throw new IllegalStateException("Unexpected non-null ExprNodeConstantDesc: " @@ -198,10 +211,11 @@ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { boolean isAnd = udf instanceof GenericUDFOPAnd; if (isAnd || udf instanceof GenericUDFOPOr) { List children = expr.getChildren(); - ExprNodeDesc left = children.get(0); - children.set(0, compactExpr(left)); - ExprNodeDesc right = children.get(1); - children.set(1, compactExpr(right)); + ExprNodeDesc left = removeTruePredciates(children.get(0)); + children.set(0, left == null ? null : compactExpr(left)); + ExprNodeDesc right = removeTruePredciates(children.get(1)); + children.set(1, right == null ? null : compactExpr(right)); + // Note that one does not simply compact (not-null or null) to not-null. // Only if we have an "and" is it valid to send one side to metastore. if (children.get(0) == null && children.get(1) == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index e2b9ae5..b5ccf80 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -122,6 +122,7 @@ import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HiveMergeProjectRule; +import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePullUpProjectsAboveJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule; import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushJoinThroughJoinRule; @@ -237,6 +238,7 @@ import org.eigenbase.relopt.RelOptCluster; import org.eigenbase.relopt.RelOptPlanner; import org.eigenbase.relopt.RelOptQuery; +import org.eigenbase.relopt.RelOptRule; import org.eigenbase.relopt.RelOptSchema; import org.eigenbase.relopt.RelTraitSet; import org.eigenbase.relopt.hep.HepPlanner; @@ -247,7 +249,6 @@ import org.eigenbase.rex.RexBuilder; import org.eigenbase.rex.RexInputRef; import org.eigenbase.rex.RexNode; -import org.eigenbase.sql.fun.SqlStdOperatorTable; import org.eigenbase.util.CompositeList; import com.google.common.base.Function; @@ -11860,9 +11861,27 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { + // TODO: Decorelation of subquery should be done before attempting + // Partition Pruning; otherwise Expression evaluation may try to execute + // corelated sub query. + basePlan = hepPlan(basePlan, mdProvider, + HivePushFilterPastJoinRule.FILTER_ON_JOIN, + HivePushFilterPastJoinRule.JOIN, new HivePartitionPrunerRule( + SemanticAnalyzer.this.conf)); + + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null); + basePlan = fieldTrimmer.trim(basePlan); + + return basePlan; + } + + private RelNode hepPlan(RelNode basePlan, + RelMetadataProvider mdProvider, RelOptRule...rules) { + HepProgramBuilder programBuilder = new HepProgramBuilder(); - programBuilder.addRuleInstance(HivePushFilterPastJoinRule.FILTER_ON_JOIN); - programBuilder.addRuleInstance(HivePushFilterPastJoinRule.JOIN); + for(RelOptRule rule : rules) { + programBuilder.addRuleInstance(rule); + } HepPlanner planner = new HepPlanner(programBuilder.build()); List list = Lists.newArrayList(); @@ -11873,12 +11892,7 @@ public RelNode applyPreCBOTransforms(RelNode basePlan, new CachingRelMetadataProvider(chainedProvider, planner)); planner.setRoot(basePlan); - basePlan = planner.findBestExp(); - - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null); - basePlan = fieldTrimmer.trim(basePlan); - return basePlan; - + return planner.findBestExp(); } private RelNode genUnionLogicalPlan(String unionalias, String leftalias, @@ -12114,7 +12128,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) { // 4. Build RelOptAbstractTable RelOptHiveTable optTable = new RelOptHiveTable(m_relOptSchema, - tableAlias, rowType, tab, nonPartitionColumns, partitionColumns); + tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf); // 5. Build Hive Table Scan Rel tableRel = new HiveTableScanRel(m_cluster, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index faa2387..eba6299 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -87,6 +87,7 @@ private static final Log LOG = LogFactory.getLog(StatsUtils.class.getName()); + /** * Collect table, partition and column level statistics * @param conf @@ -103,11 +104,18 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, TableScanOperator tableScanOperator) { - Statistics stats = new Statistics(); - // column level statistics are required only for the columns that are needed List schema = tableScanOperator.getSchema().getSignature(); List neededColumns = tableScanOperator.getNeededColumns(); + + return collectStatistics(conf, partList, table, schema, neededColumns); + } + + public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, + Table table, List schema, List neededColumns) { + + Statistics stats = new Statistics(); + boolean fetchColStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); boolean fetchPartStats =