diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 148cf7e3d6..c269f0fd5a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2348,6 +2348,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_OPTIMIZE_CONSTRAINTS_JOIN("hive.optimize.constraints.join", true, "Whether to use referential constraints\n" + "to optimize (remove or transform) join operators"), + HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS("hive.optimize.filter.preds.sort", true, "Whether to sort conditions in filters\n" + + "based on estimated selectivity and compute cost"), + HIVE_OPTIMIZE_REDUCE_WITH_STATS("hive.optimize.filter.stats.reduction", false, "Whether to simplify comparison\n" + "expressions in filter operators using column stats"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java new file mode 100644 index 0000000000..884672d77f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexDynamicParam; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.FilterSelectivityEstimator; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize; + + +/** + * Rule that sorts conditions in a filter predicate to accelerate query processing + * based on selectivity and compute cost. Currently it is not applied recursively, + * i.e., it is only applied to top predicates in the condition. + */ +public class HiveFilterSortPredicates extends RelOptRule { + + public static final HiveFilterSortPredicates INSTANCE = new HiveFilterSortPredicates(); + + + private HiveFilterSortPredicates() { + super( + operand(Filter.class, any())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + final RexNode originalCond = filter.getCondition(); + final RexNode newCond = originalCond.accept( + new RexSortPredicatesShuttle( + filter.getInput(), filter.getCluster().getMetadataQuery())); + call.transformTo(filter.copy(filter.getTraitSet(), filter.getInput(), newCond)); + } + + /** + * + */ + private static class RexSortPredicatesShuttle extends RexShuttle { + + private FilterSelectivityEstimator selectivityEstimator; + + private RexSortPredicatesShuttle(RelNode inputRel, RelMetadataQuery mq) { + selectivityEstimator = new FilterSelectivityEstimator(inputRel, mq); + } + + @Override + public RexNode visitCall(final RexCall call) { + switch (call.getKind()) { + case AND: + List newAndOperands = call.getOperands() + .stream() + .map(pred -> new Pair<>(pred, rankingAnd(pred))) + .sorted(Comparator.nullsLast(Comparator.comparing(Pair::getValue))) + .map(Pair::getKey) + .collect(Collectors.toList()); + return call.clone(call.getType(), newAndOperands); + case OR: + List newOrOperands = call.getOperands() + .stream() + .map(pred -> new Pair<>(pred, rankingOr(pred))) + .sorted(Comparator.nullsLast(Comparator.comparing(Pair::getValue))) + .map(Pair::getKey) + .collect(Collectors.toList()); + return call.clone(call.getType(), newOrOperands); + default: + return call; + } + } + + private Double rankingAnd(RexNode e) { + Double selectivity = selectivityEstimator.estimateSelectivity(e); + if (selectivity == null) { + return null; + } + Double costPerTuple = costPerTuple(e); + if (costPerTuple == null) { + return null; + } + return (selectivity - 1d) / costPerTuple; + } + + private Double rankingOr(RexNode e) { + Double selectivity = selectivityEstimator.estimateSelectivity(e); + if (selectivity == null) { + return null; + } + Double costPerTuple = costPerTuple(e); + if (costPerTuple == null) { + return null; + } + return -selectivity / costPerTuple; + } + + private Double costPerTuple(RexNode e) { + return e.accept(new RexFunctionCost()); + } + + } + + /** + * The cost of a call expression e is computed as: + * cost(e) = functionCost + sum_1..n(byteSize(o_i) + cost(o_i)) + * with the call having operands i in 1..n. + */ + private static class RexFunctionCost extends RexVisitorImpl { + + private RexFunctionCost() { + super(true); + } + + @Override + public Double visitCall(RexCall call) { + if (!deep) { + return null; + } + + Double cost = 0.d; + for (RexNode operand : call.operands) { + Double operandCost = operand.accept(this); + if (operandCost == null) { + return null; + } + cost += operandCost; + Double size = HiveRelMdSize.averageTypeSize(operand.getType()); + if (size == null) { + return null; + } + cost += size; + } + + return cost + functionCost(call); + } + + private static Double functionCost(RexCall call) { + switch (call.getKind()) { + case EQUALS: + case NOT_EQUALS: + case LESS_THAN: + case GREATER_THAN: + case LESS_THAN_OR_EQUAL: + case GREATER_THAN_OR_EQUAL: + case IS_NOT_NULL: + case IS_NULL: + case IS_TRUE: + case IS_NOT_TRUE: + case IS_FALSE: + case IS_NOT_FALSE: + return 1d; + + case BETWEEN: + return 3d; + + case IN: + return 1d * (call.getOperands().size() - 1) + + 1d * (call.getOperands().size() - 1); + + case AND: + case OR: + return 1d * call.getOperands().size(); + + case CAST: + return 8d; + + default: + return 32d; + } + } + + @Override + public Double visitInputRef(RexInputRef inputRef) { + return 0d; + } + + @Override + public Double visitLiteral(RexLiteral literal) { + return 0d; + } + + @Override + public Double visitDynamicParam(RexDynamicParam dynamicParam) { + return 0d; + } + + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index d362e9b17d..6afb8bdf5d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -47,7 +47,7 @@ private final double childCardinality; private final RelMetadataQuery mq; - protected FilterSelectivityEstimator(RelNode childRel, RelMetadataQuery mq) { + public FilterSelectivityEstimator(RelNode childRel, RelMetadataQuery mq) { super(true); this.mq = mq; this.childRel = childRel; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java index c1cd34478d..893cb9975c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java @@ -121,6 +121,10 @@ private HiveRelMdSize() {} // supports all types @Override public Double averageTypeValueSize(RelDataType type) { + return averageTypeSize(type); + } + + public static Double averageTypeSize(RelDataType type) { switch (type.getSqlTypeName()) { case BOOLEAN: case TINYINT: @@ -163,7 +167,7 @@ public Double averageTypeValueSize(RelDataType type) { case ROW: Double average = 0.0; for (RelDataTypeField field : type.getFieldList()) { - average += averageTypeValueSize(field.getType()); + average += averageTypeSize(field.getType()); } return average; default: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 0796bc8823..394efb4315 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -185,6 +185,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortPredicates; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectMergeRule; @@ -2001,6 +2002,14 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: JDBC transformation rules"); } + // Order predicates in filter expressions + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + HepMatchOrder.BOTTOM_UP, HiveFilterSortPredicates.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: JDBC transformation rules"); + } + // 11. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);