diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index c60f733..e22a10d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -698,6 +698,15 @@ public static RexNode getTypeSafePred(RelOptCluster cluster, RexNode rex, RelDat return typeSafeRex; } + public static boolean isPrimitiveType(RelDataType rType) { + // If it is a struct, list, or map + if (rType.isStruct() || rType.getComponentType() != null || + rType.getKeyType() != null) { + return false; + } + return true; + } + public static boolean isDeterministic(RexNode expr) { boolean deterministic = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 73ca9bf..9adae5e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -202,7 +202,7 @@ public double getRowCount() { if (null == partitionList) { // we are here either unpartitioned table or partitioned table with no // predicates - computePartitionList(hiveConf, null, new HashSet()); + computePartitionList(hiveConf, null, null, new HashSet()); } if (hiveTblMetadata.isPartitioned()) { List rowCounts = StatsUtils.getBasicStatForPartitions(hiveTblMetadata, @@ -238,13 +238,14 @@ private String getColNamesForLogging(Set colLst) { return sb.toString(); } - public void computePartitionList(HiveConf conf, RexNode pruneNode, Set partOrVirtualCols) { + public void computePartitionList(HiveConf conf, String tableAlias, RexNode pruneNode, + Set partOrVirtualCols) { try { if (!hiveTblMetadata.isPartitioned() || pruneNode == null || InputFinder.bits(pruneNode).length() == 0) { // there is no predicate on partitioning column, we need all partitions // in this case. - partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(), + partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, tableAlias, partitionCache); return; } @@ -252,14 +253,18 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode, Set // We have valid pruning expressions, only retrieve qualifying partitions ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), partOrVirtualCols, this.getRelOptSchema().getTypeFactory())); - - partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), + partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, tableAlias, partitionCache); + LOG.info("Jesus - partitionCache: \n" + partitionCache); } catch (HiveException he) { throw new RuntimeException(he); } } + public PrunedPartitionList getComputedPartitionList() { + return partitionList; + } + private void updateColStats(Set projIndxLst, boolean allowNullColumnForMissingStats) { List nonPartColNamesThatRqrStats = new ArrayList(); List nonPartColIndxsThatRqrStats = new ArrayList(); @@ -290,7 +295,7 @@ private void updateColStats(Set projIndxLst, boolean allowNullColumnFor if (null == partitionList) { // We could be here either because its an unpartitioned table or because // there are no pruning predicates on a partitioned table. - computePartitionList(hiveConf, null, new HashSet()); + computePartitionList(hiveConf, null, null, new HashSet()); } // 2. Obtain Col Stats for Non Partition Cols diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionConditionRemoval.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionConditionRemoval.java new file mode 100644 index 0000000..b2ca924 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionConditionRemoval.java @@ -0,0 +1,538 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexDynamicParam; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexLocalRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexRangeRef; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartExprEvalUtils; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Rule that performs partition pruning and removal of partition + * conditions (if enabled). + */ +public class HivePartitionConditionRemoval extends RelOptRule { + + private static final Logger LOG = LoggerFactory.getLogger(HivePartitionConditionRemoval.class); + + public static final HivePartitionConditionRemoval INSTANCE = + new HivePartitionConditionRemoval(); + + public HivePartitionConditionRemoval() { + super(operand(HiveFilter.class, + operand(HiveTableScan.class, none()))); + } + + @Override + public void onMatch(RelOptRuleCall call) { + HiveFilter filter = call.rel(0); + HiveTableScan tScan = call.rel(1); + + RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable(); + PrunedPartitionList prunedPartList = hiveTable.getComputedPartitionList(); + if (prunedPartList == null) { + return; + } + List partitions = new ArrayList<>(); + for (Partition p : prunedPartList.getPartitions()) { + if (!p.getTable().isPartitioned()) { + return; + } + } + partitions.addAll(prunedPartList.getPartitions()); + + PartitionConditionRemovalVisitor visitor = new PartitionConditionRemovalVisitor( + call.builder().getRexBuilder(), filter.getCluster().getTypeFactory(), + hiveTable, partitions); + RexNodeWrapper wrapper = filter.getCondition().accept(visitor); + + if (wrapper.state == WalkState.TRUE) { + call.transformTo(filter.getInput()); + } else if (wrapper.state != WalkState.FALSE) { + call.transformTo(call.builder().push(filter.getInput()).filter(wrapper.outExpr).build()); + } else { + LOG.warn("Filter passes no row"); + call.transformTo(call.builder().push(filter.getInput()).filter(wrapper.outExpr).build()); + } + + // New plan is absolutely better than old plan. + call.getPlanner().setImportance(filter, 0.0); + } + + protected static class PartitionConditionRemovalVisitor implements RexVisitor { + private final RexBuilder rexBuilder; + private final RelDataTypeFactory typeFactory; + private final RelOptHiveTable hiveTable; + private final List partitions; + + PartitionConditionRemovalVisitor(RexBuilder rexBuilder, RelDataTypeFactory typeFactory, + RelOptHiveTable hiveTable, List partitions) { + this.rexBuilder = rexBuilder; + this.typeFactory = typeFactory; + this.hiveTable = hiveTable; + this.partitions = partitions; + } + + @Override + public RexNodeWrapper visitInputRef(RexInputRef inputRef) { + if (hiveTable.getPartColInfoMap().containsKey(inputRef.getIndex())) { + return new RexNodeWrapper(WalkState.PART_COL, null, inputRef); + } + return new RexNodeWrapper(WalkState.UNKNOWN, null, inputRef); + } + + @Override + public RexNodeWrapper visitLocalRef(RexLocalRef localRef) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, localRef); + } + + @Override + public RexNodeWrapper visitLiteral(RexLiteral literal) { + return new RexNodeWrapper(WalkState.CONSTANT, null, literal); + } + + @Override + public RexNodeWrapper visitOver(RexOver over) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, over); + } + + @Override + public RexNodeWrapper visitCorrelVariable(RexCorrelVariable correlVariable) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, correlVariable); + } + + @Override + public RexNodeWrapper visitCall(RexCall call) { + // Visit the children + List childrenOutputs = new ArrayList<>(); + for (RexNode operand : call.operands) { + childrenOutputs.add(operand.accept(this)); + } + + switch (call.getKind()) { + + case NOT: { + assert childrenOutputs.size() == 1; + RexNodeWrapper wrapper = childrenOutputs.get(0); + if (wrapper.state == WalkState.TRUE) { + RexNode literal = castIfNecessary(rexBuilder, wrapper.outExpr.getType(), + rexBuilder.makeLiteral(false)); + return new RexNodeWrapper(WalkState.FALSE, null, literal); + } else if (wrapper.state == WalkState.FALSE) { + RexNode literal = castIfNecessary(rexBuilder, wrapper.outExpr.getType(), + rexBuilder.makeLiteral(true)); + return new RexNodeWrapper(WalkState.TRUE, null, literal); + } else if (wrapper.state == WalkState.DIVIDED) { + Boolean[] results = new Boolean[partitions.size()]; + for (int i = 0; i < partitions.size(); i++) { + results[i] = opNot(wrapper.resultVector[i]); + } + return new RexNodeWrapper(WalkState.DIVIDED, results, + getOutExpr(call, childrenOutputs)); + } + return new RexNodeWrapper(wrapper.state, null, + getOutExpr(call, childrenOutputs)); + } + + case AND: { + boolean anyUnknown = false; // Whether any of the node outputs is unknown + boolean allDivided = true; // Whether all of the node outputs are divided + List newChildrenOutputs = + new ArrayList<>(childrenOutputs.size()); + for (int i = 0; i < childrenOutputs.size(); i++) { + RexNodeWrapper c = childrenOutputs.get(i); + if (c.state == WalkState.FALSE) { + return c; + } + if (c.state == WalkState.UNKNOWN) { + anyUnknown = true; + } + if (c.state != WalkState.DIVIDED) { + allDivided = false; + } + if (c.state != WalkState.TRUE) { + newChildrenOutputs.add(c); + } + } + // If all of them were true, return true + if (newChildrenOutputs.size() == 0) { + RexNode literal = castIfNecessary(rexBuilder, call.getType(), + rexBuilder.makeLiteral(true)); + return new RexNodeWrapper(WalkState.TRUE, null, literal); + } + // If we are left with a single child, return the child + if (newChildrenOutputs.size() == 1) { + return newChildrenOutputs.get(0); + } + if (anyUnknown) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, + getOutExpr(call, newChildrenOutputs)); + } + if (allDivided) { + Boolean[] results = new Boolean[partitions.size()]; + for (int i = 0; i < partitions.size(); i++) { + Boolean[] andArray = new Boolean[newChildrenOutputs.size()]; + for (int j = 0; j < newChildrenOutputs.size(); j++) { + andArray[j] = newChildrenOutputs.get(j).resultVector[i]; + } + results[i] = opAnd(andArray); + } + return getResultWrapFromResults(rexBuilder, results, call, newChildrenOutputs); + } + return new RexNodeWrapper(WalkState.UNKNOWN, null, + getOutExpr(call, newChildrenOutputs)); + } + + case OR: { + boolean anyUnknown = false; // Whether any of the node outputs is unknown + boolean allDivided = true; // Whether all of the node outputs are divided + List newChildrenOutputs = + new ArrayList<>(childrenOutputs.size()); + for (int i = 0; i < childrenOutputs.size(); i++) { + RexNodeWrapper c = childrenOutputs.get(i); + if (c.state == WalkState.TRUE) { + return c; + } + if (c.state == WalkState.UNKNOWN) { + anyUnknown = true; + } + if (c.state != WalkState.DIVIDED) { + allDivided = false; + } + if (c.state != WalkState.FALSE) { + newChildrenOutputs.add(c); + } + } + // If all of them were false, return false + if (newChildrenOutputs.size() == 0) { + RexNode literal = castIfNecessary(rexBuilder, call.getType(), + rexBuilder.makeLiteral(false)); + return new RexNodeWrapper(WalkState.FALSE, null, literal); + } + // If we are left with a single child, return the child + if (newChildrenOutputs.size() == 1) { + return newChildrenOutputs.get(0); + } + if (anyUnknown) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, + getOutExpr(call, newChildrenOutputs)); + } + if (allDivided) { + Boolean[] results = new Boolean[partitions.size()]; + for (int i = 0; i < partitions.size(); i++) { + Boolean[] orArray = new Boolean[newChildrenOutputs.size()]; + for (int j = 0; j < newChildrenOutputs.size(); j++) { + orArray[j] = newChildrenOutputs.get(j).resultVector[i]; + } + results[i] = opOr(orArray); + } + return getResultWrapFromResults(rexBuilder, results, call, newChildrenOutputs); + } + return new RexNodeWrapper(WalkState.UNKNOWN, null, + getOutExpr(call, newChildrenOutputs)); + } + + default: { + if (!HiveCalciteUtil.isDeterministic(call)) { + // It is a non-deterministic function, set unknown to true + return new RexNodeWrapper(WalkState.UNKNOWN, null, + getOutExpr(call, childrenOutputs)); + } else { + // If any child is unknown, set unknown to true + boolean hasPartitionColumn = false; + for (RexNodeWrapper wrapper : childrenOutputs) { + if (wrapper.state == WalkState.UNKNOWN) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, + getOutExpr(call, childrenOutputs)); + } else if (wrapper.state == WalkState.PART_COL) { + hasPartitionColumn = true; + } + } + + if (hasPartitionColumn && HiveCalciteUtil.isPrimitiveType(call.getType())) { + // we need to evaluate result for every pruned partition + if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) { + // if the return type of the function is boolean and all partitions agree on + // a result, we update the state of the node to be TRUE of FALSE + Boolean[] results = new Boolean[partitions.size()]; + for (int i = 0; i < partitions.size(); i++) { + RexNode evaluateResult = evalExprWithPart(rexBuilder, typeFactory, hiveTable, + call, partitions.get(i)); + results[i] = !RexLiteral.isNullLiteral(evaluateResult) ? + RexLiteral.booleanValue(evaluateResult) : null; + } + return getResultWrapFromResults(rexBuilder, results, call, childrenOutputs); + } + + // the case that return type of the function is not boolean, and if not all partition + // agree on result, we make the node UNKNOWN. If they all agree, we replace the node + // to be a CONSTANT node with value to be the agreed result. + RexNode[] results = new RexNode[partitions.size()]; + for (int i = 0; i < partitions.size(); i++) { + results[i] = evalExprWithPart(rexBuilder, typeFactory, hiveTable, + call, partitions.get(i)); + } + RexNode result = ifResultsAgree(results); + if (result == null) { + // if the result is not boolean and not all partition agree on the + // result, we don't remove the condition. Potentially, it can miss + // the case like "where ds % 3 == 1 or ds % 3 == 2" + // TODO: handle this case by making result vector to handle all + // constant values. + return new RexNodeWrapper(WalkState.UNKNOWN, null, getOutExpr(call, childrenOutputs)); + } + RexNode literal = castIfNecessary(rexBuilder, call.getType(), result); + return new RexNodeWrapper(WalkState.CONSTANT, null, literal); + } + + return new RexNodeWrapper(WalkState.CONSTANT, null, getOutExpr(call, childrenOutputs)); + } + } + } + } + + @Override + public RexNodeWrapper visitDynamicParam(RexDynamicParam dynamicParam) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, dynamicParam); + } + + @Override + public RexNodeWrapper visitRangeRef(RexRangeRef rangeRef) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, rangeRef); + } + + @Override + public RexNodeWrapper visitFieldAccess(RexFieldAccess fieldAccess) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, fieldAccess); + } + + @Override + public RexNodeWrapper visitSubQuery(RexSubQuery subQuery) { + return new RexNodeWrapper(WalkState.UNKNOWN, null, subQuery); + } + + private static RexNode castIfNecessary( + RexBuilder rexBuilder, + RelDataType type, + RexNode exp) { + if (!type.equals(exp.getType())) { + return rexBuilder.makeCast(type, exp); + } + return exp; + } + + private static RexCall getOutExpr(RexCall call, List childrenOutputs) { + List children = new ArrayList<>(); + if (childrenOutputs != null) { + for (RexNodeWrapper wrapper : childrenOutputs) { + children.add(wrapper.outExpr); + } + } + return call.clone(call.getType(), children); + } + + private static RexNode evalExprWithPart(RexBuilder rexBuilder, RelDataTypeFactory typeFactory, + RelOptHiveTable hiveTable, RexNode condition, Partition p) { + Table tbl = p.getTable(); + + StructObjectInspector rowObjectInspector; + try { + rowObjectInspector = (StructObjectInspector) tbl.getDeserializer().getObjectInspector(); + } catch (SerDeException e) { + throw new RuntimeException(e); + } + + // 1) We convert ExprNodeDesc to RexNode + Map virtualColumnsMap = HiveCalciteUtil.getVColsMap( + hiveTable.getVirtualCols(), hiveTable.getNoOfNonVirtualCols()); + ExprNodeConverter converter = new ExprNodeConverter(tbl.getTableName(), + hiveTable.getRowType(), virtualColumnsMap.keySet(), typeFactory); + ExprNodeDesc expr = condition.accept(converter); + + // 2) We evaluate the expression with the partitions + Object result; + try { + result = PartExprEvalUtils.evalExprWithPart(expr, p, hiveTable.getVirtualCols(), rowObjectInspector); + } catch (HiveException e) { + throw new RuntimeException(e); + } + + // 3) We transform the result into a RexNode (we take nullability into account to avoid + // introducing CAST) + if (result == null && !condition.getType().isNullable()) { + return rexBuilder.makeLiteral(result, + typeFactory.createTypeWithNullability(condition.getType(), true), false); + } + if (result != null && condition.getType().isNullable()) { + return rexBuilder.makeLiteral(result, + typeFactory.createTypeWithNullability(condition.getType(), false), false); + } + return rexBuilder.makeLiteral(result, condition.getType(), false); + } + + private static Boolean ifResultsAgree(Boolean[] resultVector) { + Boolean result = null; + for (Boolean b : resultVector) { + if (b == null) { + return null; + } else if (result == null) { + result = b; + } else if (!result.equals(b)) { + return null; + } + } + return result; + } + + private static RexNode ifResultsAgree(RexNode[] resultVector) { + RexNode result = null; + for (RexNode b : resultVector) { + if (b == null) { + return null; + } else if (result == null) { + result = b; + } else if (!result.toString().equals(b.toString())) { + return null; + } + } + return result; + } + + private static RexNodeWrapper getResultWrapFromResults(RexBuilder rexBuilder, + Boolean[] results, RexCall call, List childrenOutputs) { + Boolean ifAgree = ifResultsAgree(results); + if (ifAgree == null) { + return new RexNodeWrapper(WalkState.DIVIDED, results, + getOutExpr(call, childrenOutputs)); + } else if (ifAgree.booleanValue() == true) { + RexNode literal = castIfNecessary(rexBuilder, call.getType(), + rexBuilder.makeLiteral(true)); + return new RexNodeWrapper(WalkState.TRUE, null, literal); + } else { + RexNode literal = castIfNecessary(rexBuilder, call.getType(), + rexBuilder.makeLiteral(false)); + return new RexNodeWrapper(WalkState.FALSE, null, literal); + } + } + + private static Boolean opAnd(Boolean... ops) { + // When people forget to quote a string, op1/op2 is null. + // For example, select * from some_table where ds > 2012-12-1 and ds < 2012-12-2 . + boolean anyNull = false; + for (Boolean op : ops) { + if (op == null) { + anyNull = true; + continue; + } + if (op.equals(Boolean.FALSE)) { + return Boolean.FALSE; + } + } + if (anyNull) { + return null; + } + return Boolean.TRUE; + } + + private static Boolean opOr(Boolean... ops) { + // When people forget to quote a string, op1/op2 is null. + // For example, select * from some_table where ds > 2012-12-1 or ds < 2012-12-2 . + boolean anyNull = false; + for (Boolean op : ops) { + if (op == null) { + anyNull = true; + continue; + } + if (op.equals(Boolean.TRUE)) { + return Boolean.TRUE; + } + } + if (anyNull) { + return null; + } + return Boolean.FALSE; + } + + private static Boolean opNot(Boolean op) { + // When people forget to quote a string, op1/op2 is null. + // For example, select * from some_table where not ds > 2012-12-1 . + if (op != null) { + if (op.equals(Boolean.TRUE)) { + return Boolean.FALSE; + } + if (op.equals(Boolean.FALSE)) { + return Boolean.TRUE; + } + } + return null; + } + } + + private enum WalkState { + PART_COL, TRUE, FALSE, CONSTANT, UNKNOWN, DIVIDED + } + + private static class RexNodeWrapper { + + private WalkState state; + private Boolean[] resultVector; + private RexNode outExpr; + + private RexNodeWrapper(WalkState state, Boolean[] resultVector, RexNode outExpr) { + super(); + this.state = state; + this.resultVector = resultVector; + this.outExpr = outExpr; + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java index d8c3a22..b6f34ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java @@ -52,6 +52,6 @@ protected void perform(RelOptRuleCall call, Filter filter, Pair predicates = PartitionPrune .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate); RexNode partColExpr = predicates.left; - hiveTable.computePartitionList(conf, partColExpr, tScan.getPartOrVirtualCols()); + hiveTable.computePartitionList(conf, tScan.getTableAlias(), partColExpr, tScan.getPartOrVirtualCols()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index de6a053..ba0f304 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,8 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionConditionRemoval; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; @@ -164,10 +163,12 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -1126,7 +1127,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // We need a cost model for MR to enable this on MR. basePlan = hepPlan(basePlan, true, mdProvider, null, HiveExpandDistinctAggregatesRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); + "Calcite: Prejoin ordering transformation - Distinct aggregate rewrite"); } // 2. Try factoring out common filter elements & separating deterministic @@ -1138,7 +1139,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY, HivePreFilteringRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); + "Calcite: Prejoin ordering transformation - " + + "Factor out common filter elements and separating deterministic vs non-deterministic UDF"); // 3. Run exhaustive PPD, add not null filters, transitive inference, // constant propagation, constant folding @@ -1168,13 +1170,32 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveSortMergeRule.INSTANCE); rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE); rules.add(HiveUnionPullUpConstantsRule.INSTANCE); + rules.add(new HivePartitionPruneRule(conf)); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, rules.toArray(new RelOptRule[rules.size()])); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); + "Calcite: Prejoin ordering transformation - " + + "PPD, not null predicates, transitive inference, " + + "constant folding, partition pruning, partition predicate removal"); + + // 3. Apply partition pruning + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation - " + + "Partition pruning"); - // 4. Push down limit through outer join + // 4. Apply partition condition removal and reduce expressions for filter + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, false, mdProvider, null, + HivePartitionConditionRemoval.INSTANCE, + HiveReduceExpressionsRule.FILTER_INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation - " + + "Partition condition removal"); + + // 5. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 @@ -1193,21 +1214,17 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Push down limit through outer join"); + "Calcite: Prejoin ordering transformation - " + + "Push down limit through outer join"); } - // 5. Push Down Semi Joins + // 6. Push Down Semi Joins perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, null, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); - - // 6. Apply Partition Pruning - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Partition Pruning"); + "Calcite: Prejoin ordering transformation - " + + "Push Down Semi Joins"); // 7. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); @@ -1215,14 +1232,16 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveRelFactories.HIVE_BUILDER.create(cluster, null)); basePlan = fieldTrimmer.trim(basePlan); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Projection Pruning"); + "Calcite: Prejoin ordering transformation - " + + "Projection Pruning"); // 8. Merge Project-Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, null, new ProjectMergeRule(true, HiveRelFactories.HIVE_PROJECT_FACTORY)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Merge Project-Project"); + "Calcite: Prejoin ordering transformation - " + + "Merge Project-Project"); // 9. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into @@ -1233,7 +1252,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Rerun PPD"); + "Calcite: Prejoin ordering transformation - " + + "Rerun PPD"); return basePlan; }