diff --git pom.xml pom.xml index 2337e89..b014b9d 100644 --- pom.xml +++ pom.xml @@ -110,7 +110,7 @@ 0.1 1.7.7 0.8.0.RELEASE - 1.6.0 + 1.7.0 4.2.1 4.1.6 4.1.7 @@ -191,6 +191,19 @@ + calcite + calcite repository + https://repository.apache.org/content/repositories/orgapachecalcite-1017 + default + + true + warn + + + false + + + datanucleus datanucleus maven repository http://www.datanucleus.org/downloads/maven2 diff --git ql/pom.xml ql/pom.xml index ebb9599..0386baa 100644 --- ql/pom.xml +++ ql/pom.xml @@ -371,32 +371,6 @@ - org.apache.calcite - calcite-avatica - ${calcite.version} - - - - org.hsqldb - hsqldb - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - - com.google.guava guava ${guava.version} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index c0609d7..75fb916 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -77,7 +77,7 @@ public RelMetadataProvider getMetadataProvider() { HiveRelMdDistribution.SOURCE, HiveRelMdCollation.SOURCE, HiveRelMdPredicates.SOURCE, - new DefaultRelMetadataProvider())); + DefaultRelMetadataProvider.INSTANCE)); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java new file mode 100644 index 0000000..e731b0c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.calcite.plan.Contexts; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.server.CalciteServerStatement; +import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; + + +/** + * Builder for relational expressions in Hive. + * + *

{@code RelBuilder} does not make possible anything that you could not + * also accomplish by calling the factory methods of the particular relational + * expression. But it makes common tasks more straightforward and concise. + * + *

It is not thread-safe. + */ +public class HiveRelBuilder extends RelBuilder { + + private HiveRelBuilder(Context context, RelOptCluster cluster, RelOptSchema relOptSchema) { + super(context, cluster, relOptSchema); + } + + /** Creates a RelBuilder. */ + public static RelBuilder create(FrameworkConfig config) { + final RelOptCluster[] clusters = {null}; + final RelOptSchema[] relOptSchemas = {null}; + Frameworks.withPrepare( + new Frameworks.PrepareAction(config) { + public Void apply(RelOptCluster cluster, RelOptSchema relOptSchema, + SchemaPlus rootSchema, CalciteServerStatement statement) { + clusters[0] = cluster; + relOptSchemas[0] = relOptSchema; + return null; + } + }); + return new HiveRelBuilder(config.getContext(), clusters[0], relOptSchemas[0]); + } + + /** Creates a {@link RelBuilderFactory}, a partially-created RelBuilder. + * Just add a {@link RelOptCluster} and a {@link RelOptSchema} */ + public static RelBuilderFactory proto(final Context context) { + return new RelBuilderFactory() { + public RelBuilder create(RelOptCluster cluster, RelOptSchema schema) { + return new HiveRelBuilder(context, cluster, schema); + } + }; + } + + /** Creates a {@link RelBuilderFactory} that uses a given set of factories. */ + public static RelBuilderFactory proto(Object... factories) { + return proto(Contexts.of(factories)); + } + + @Override + public RelBuilder empty() { + final RelNode input = build(); + final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort( + input, RelCollations.of(), null, literal(0)); + return this.push(sort); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index 971b446..cf93ed8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -41,7 +41,6 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -78,7 +77,7 @@ new HiveSetOpFactoryImpl(); public static final RelBuilderFactory HIVE_BUILDER = - RelBuilder.proto( + HiveRelBuilder.proto( Contexts.of(HIVE_PROJECT_FACTORY, HIVE_FILTER_FACTORY, HIVE_JOIN_FACTORY, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java deleted file mode 100644 index 2f309f3..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.calcite; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.calcite.linq4j.Ord; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexLiteral; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.util.Pair; -import org.apache.calcite.util.Util; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; - - -public class HiveRexUtil { - - /** - * Simplifies a boolean expression. - * - *

In particular:

- *
    - *
  • {@code simplify(x = 1 AND y = 2 AND NOT x = 1)} - * returns {@code y = 2}
  • - *
  • {@code simplify(x = 1 AND FALSE)} - * returns {@code FALSE}
  • - *
- */ - public static RexNode simplify(RexBuilder rexBuilder, RexNode e) { - switch (e.getKind()) { - case AND: - return simplifyAnd(rexBuilder, (RexCall) e); - case OR: - return simplifyOr(rexBuilder, (RexCall) e); - case CASE: - return simplifyCase(rexBuilder, (RexCall) e); - case IS_NULL: - return ((RexCall) e).getOperands().get(0).getType().isNullable() - ? e : rexBuilder.makeLiteral(false); - case IS_NOT_NULL: - return ((RexCall) e).getOperands().get(0).getType().isNullable() - ? e : rexBuilder.makeLiteral(true); - default: - return e; - } - } - - private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call) { - final List operands = call.getOperands(); - final List newOperands = new ArrayList<>(); - for (int i = 0; i < operands.size(); i++) { - RexNode operand = operands.get(i); - if (RexUtil.isCasePredicate(call, i)) { - if (operand.isAlwaysTrue()) { - // Predicate is always TRUE. Make value the ELSE and quit. - newOperands.add(operands.get(i + 1)); - break; - } - if (operand.isAlwaysFalse()) { - // Predicate is always FALSE. Skip predicate and value. - ++i; - continue; - } - } - newOperands.add(operand); - } - assert newOperands.size() % 2 == 1; - switch (newOperands.size()) { - case 1: - return rexBuilder.makeCast(call.getType(), newOperands.get(0)); - } - trueFalse: - if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) { - // Optimize CASE where every branch returns constant true or constant - // false: - // CASE - // WHEN p1 THEN TRUE - // WHEN p2 THEN FALSE - // WHEN p3 THEN TRUE - // ELSE FALSE - // END - final List> pairs = - casePairs(rexBuilder, newOperands); - for (Ord> pair : Ord.zip(pairs)) { - if (!pair.e.getValue().isAlwaysTrue() - && !pair.e.getValue().isAlwaysFalse()) { - break trueFalse; - } - } - final List terms = new ArrayList<>(); - final List notTerms = new ArrayList<>(); - for (Ord> pair : Ord.zip(pairs)) { - if (pair.e.getValue().isAlwaysTrue()) { - terms.add(RexUtil.andNot(rexBuilder, pair.e.getKey(), notTerms)); - } else { - notTerms.add(pair.e.getKey()); - } - } - return RexUtil.composeDisjunction(rexBuilder, terms, false); - } - if (newOperands.equals(operands)) { - return call; - } - return call.clone(call.getType(), newOperands); - } - - /** Given "CASE WHEN p1 THEN v1 ... ELSE e END" - * returns [(p1, v1), ..., (true, e)]. */ - private static List> casePairs(RexBuilder rexBuilder, - List operands) { - final ImmutableList.Builder> builder = - ImmutableList.builder(); - for (int i = 0; i < operands.size() - 1; i += 2) { - builder.add(Pair.of(operands.get(i), operands.get(i + 1))); - } - builder.add( - Pair.of((RexNode) rexBuilder.makeLiteral(true), Util.last(operands))); - return builder.build(); - } - - public static RexNode simplifyAnd(RexBuilder rexBuilder, RexCall e) { - final List terms = RelOptUtil.conjunctions(e); - final List notTerms = new ArrayList<>(); - final List negatedTerms = new ArrayList<>(); - final List nullOperands = new ArrayList<>(); - final List notNullOperands = new ArrayList<>(); - final Set comparedOperands = new HashSet<>(); - for (int i = 0; i < terms.size(); i++) { - final RexNode term = terms.get(i); - if (!HiveCalciteUtil.isDeterministic(term)) { - continue; - } - switch (term.getKind()) { - case NOT: - notTerms.add( - ((RexCall) term).getOperands().get(0)); - terms.remove(i); - --i; - break; - case LITERAL: - if (!RexLiteral.booleanValue(term)) { - return term; // false - } else { - terms.remove(i); - --i; - } - break; - case EQUALS: - case NOT_EQUALS: - case LESS_THAN: - case GREATER_THAN: - case LESS_THAN_OR_EQUAL: - case GREATER_THAN_OR_EQUAL: - RexCall call = (RexCall) term; - RexNode left = call.getOperands().get(0); - comparedOperands.add(left); - // if it is a cast, we include the inner reference - if (left.getKind() == SqlKind.CAST) { - RexCall leftCast = (RexCall) left; - comparedOperands.add(leftCast.getOperands().get(0)); - } - RexNode right = call.getOperands().get(1); - comparedOperands.add(right); - // if it is a cast, we include the inner reference - if (right.getKind() == SqlKind.CAST) { - RexCall rightCast = (RexCall) right; - comparedOperands.add(rightCast.getOperands().get(0)); - } - // Assume we have the expression a > 5. - // Then we can derive the negated term: NOT(a <= 5). - // But as the comparison is string based and thus operands order dependent, - // we should also add the inverted negated term: NOT(5 >= a). - // Observe that for creating the inverted term we invert the list of operands. - RexCall negatedTerm = negate(rexBuilder, call); - if (negatedTerm != null) { - negatedTerms.add(negatedTerm); - RexCall invertNegatedTerm = invert(rexBuilder, negatedTerm); - if (invertNegatedTerm != null) { - negatedTerms.add(invertNegatedTerm); - } - } - break; - case IN: - comparedOperands.add(((RexCall) term).operands.get(0)); - break; - case BETWEEN: - comparedOperands.add(((RexCall) term).operands.get(1)); - break; - case IS_NOT_NULL: - notNullOperands.add( - ((RexCall) term).getOperands().get(0)); - terms.remove(i); - --i; - break; - case IS_NULL: - nullOperands.add( - ((RexCall) term).getOperands().get(0)); - } - } - if (terms.isEmpty() && notTerms.isEmpty() && notNullOperands.isEmpty()) { - return rexBuilder.makeLiteral(true); - } - // If one column should be null and is in a comparison predicate, - // it is not satisfiable. - // Example. IS NULL(x) AND x < 5 - not satisfiable - if (!Collections.disjoint(nullOperands, comparedOperands)) { - return rexBuilder.makeLiteral(false); - } - // Remove not necessary IS NOT NULL expressions. - // - // Example. IS NOT NULL(x) AND x < 5 : x < 5 - for (RexNode operand : notNullOperands) { - if (!comparedOperands.contains(operand)) { - terms.add( - rexBuilder.makeCall( - SqlStdOperatorTable.IS_NOT_NULL, operand)); - } - } - // If one of the not-disjunctions is a disjunction that is wholly - // contained in the disjunctions list, the expression is not - // satisfiable. - // - // Example #1. x AND y AND z AND NOT (x AND y) - not satisfiable - // Example #2. x AND y AND NOT (x AND y) - not satisfiable - // Example #3. x AND y AND NOT (x AND y AND z) - may be satisfiable - final Set termsSet = new HashSet( - Lists.transform(terms, HiveCalciteUtil.REX_STR_FN)); - for (RexNode notDisjunction : notTerms) { - final Set notSet = new HashSet( - Lists.transform(RelOptUtil.conjunctions(notDisjunction), HiveCalciteUtil.REX_STR_FN)); - if (termsSet.containsAll(notSet)) { - return rexBuilder.makeLiteral(false); - } - } - // Add the NOT disjunctions back in. - for (RexNode notDisjunction : notTerms) { - terms.add( - rexBuilder.makeCall( - SqlStdOperatorTable.NOT, notDisjunction)); - } - // The negated terms - for (RexNode notDisjunction : negatedTerms) { - final Set notSet = new HashSet( - Lists.transform(RelOptUtil.conjunctions(notDisjunction), HiveCalciteUtil.REX_STR_FN)); - if (termsSet.containsAll(notSet)) { - return rexBuilder.makeLiteral(false); - } - } - return RexUtil.composeConjunction(rexBuilder, terms, false); - } - - /** Simplifies OR(x, x) into x, and similar. */ - public static RexNode simplifyOr(RexBuilder rexBuilder, RexCall call) { - assert call.getKind() == SqlKind.OR; - final List terms = RelOptUtil.disjunctions(call); - for (int i = 0; i < terms.size(); i++) { - final RexNode term = terms.get(i); - switch (term.getKind()) { - case LITERAL: - if (RexLiteral.booleanValue(term)) { - return term; // true - } else { - terms.remove(i); - --i; - } - } - } - return RexUtil.composeDisjunction(rexBuilder, terms, false); - } - - private static RexCall negate(RexBuilder rexBuilder, RexCall call) { - switch (call.getKind()) { - case EQUALS: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, call.getOperands()); - case NOT_EQUALS: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, call.getOperands()); - case LESS_THAN: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, call.getOperands()); - case GREATER_THAN: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, call.getOperands()); - case LESS_THAN_OR_EQUAL: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN, call.getOperands()); - case GREATER_THAN_OR_EQUAL: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, call.getOperands()); - } - return null; - } - - private static RexCall invert(RexBuilder rexBuilder, RexCall call) { - switch (call.getKind()) { - case LESS_THAN: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN, - Lists.reverse(call.getOperands())); - case GREATER_THAN: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, - Lists.reverse(call.getOperands())); - case LESS_THAN_OR_EQUAL: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, - Lists.reverse(call.getOperands())); - case GREATER_THAN_OR_EQUAL: - return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - Lists.reverse(call.getOperands())); - } - return null; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java index ed45ab3..81f8fe8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java @@ -19,7 +19,10 @@ import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.BuiltInMetadata; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdPercentageOriginalRows; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -34,7 +37,7 @@ /** * HiveRelMdCost supplies the implementation of cost model. */ -public class HiveRelMdCost { +public class HiveRelMdCost implements MetadataHandler { private final HiveCostModel hiveCostModel; @@ -50,6 +53,10 @@ public RelMetadataProvider getMetadataProvider() { RelMdPercentageOriginalRows.SOURCE)); } + public MetadataDef getDef() { + return BuiltInMetadata.NonCumulativeCost.DEF; + } + public RelOptCost getNonCumulativeCost(HiveAggregate aggregate, RelMetadataQuery mq) { return hiveCostModel.getAggregateCost(aggregate); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 9006f45..db2c836 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -16,58 +16,14 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Pattern; - -import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptPredicateList; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.JoinInfo; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rel.rules.ValuesReduceRule; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexCorrelVariable; -import org.apache.calcite.rex.RexDynamicParam; -import org.apache.calcite.rex.RexFieldAccess; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexLiteral; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexOver; -import org.apache.calcite.rex.RexRangeRef; -import org.apache.calcite.rex.RexShuttle; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.rex.RexVisitorImpl; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.fun.SqlRowOperator; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.rel.rules.ReduceExpressionsRule; import org.apache.calcite.tools.RelBuilderFactory; -import org.apache.calcite.util.Pair; -import org.apache.calcite.util.Stacks; -import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; - /** * Collection of planner rules that apply various simplifying transformations on * RexNode trees. Currently, there are two transformations: @@ -79,178 +35,30 @@ * is the same as the type of the resulting cast expression * */ -public abstract class HiveReduceExpressionsRule extends RelOptRule { +public abstract class HiveReduceExpressionsRule extends ReduceExpressionsRule { //~ Static fields/initializers --------------------------------------------- /** - * Regular expression that matches the description of all instances of this - * rule and {@link ValuesReduceRule} also. Use - * it to prevent the planner from invoking these rules. - */ - public static final Pattern EXCLUSION_PATTERN = - Pattern.compile("Reduce(Expressions|Values)Rule.*"); - - /** * Singleton rule that reduces constants inside a - * {@link org.apache.calcite.rel.logical.HiveFilter}. + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter}. */ - public static final HiveReduceExpressionsRule FILTER_INSTANCE = + public static final ReduceExpressionsRule FILTER_INSTANCE = new FilterReduceExpressionsRule(HiveFilter.class, HiveRelFactories.HIVE_BUILDER); /** * Singleton rule that reduces constants inside a - * {@link org.apache.calcite.rel.logical.HiveProject}. + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject}. */ - public static final HiveReduceExpressionsRule PROJECT_INSTANCE = + public static final ReduceExpressionsRule PROJECT_INSTANCE = new ProjectReduceExpressionsRule(HiveProject.class, HiveRelFactories.HIVE_BUILDER); /** * Singleton rule that reduces constants inside a - * {@link org.apache.calcite.rel.core.HiveJoin}. + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin}. */ - public static final HiveReduceExpressionsRule JOIN_INSTANCE = + public static final ReduceExpressionsRule JOIN_INSTANCE = new JoinReduceExpressionsRule(HiveJoin.class, HiveRelFactories.HIVE_BUILDER); - /** - * Rule that reduces constants inside a {@link org.apache.calcite.rel.core.Filter}. - * If the condition is a constant, the filter is removed (if TRUE) or replaced with - * an empty {@link org.apache.calcite.rel.core.Values} (if FALSE or NULL). - */ - public static class FilterReduceExpressionsRule extends HiveReduceExpressionsRule { - - public FilterReduceExpressionsRule(Class filterClass, - RelBuilderFactory relBuilderFactory) { - super(filterClass, relBuilderFactory, "HiveReduceExpressionsRule(Filter)"); - } - - @Override public void onMatch(RelOptRuleCall call) { - final Filter filter = call.rel(0); - final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); - - RexNode newConditionExp = HiveRexUtil.simplify(rexBuilder, filter.getCondition()); - final List expList = Lists.newArrayList(newConditionExp); - boolean reduced = false; - final RelOptPredicateList predicates = - RelMetadataQuery.instance().getPulledUpPredicates(filter.getInput()); - if (reduceExpressions(filter, expList, predicates)) { - assert expList.size() == 1; - newConditionExp = expList.get(0); - reduced = true; - } - - // Even if no reduction, let's still test the original - // predicate to see if it was already a constant, - // in which case we don't need any runtime decision - // about filtering. - // TODO: support LogicalValues - if (newConditionExp.isAlwaysTrue()) { - call.transformTo( - filter.getInput()); - } else if (reduced - || !newConditionExp.toString().equals(filter.getCondition().toString())) { - call.transformTo(call.builder(). - push(filter.getInput()).filter(newConditionExp).build()); - } else { - return; - } - - // New plan is absolutely better than old plan. - call.getPlanner().setImportance(filter, 0.0); - } - } - - /** - * Rule that reduces constants inside a {@link org.apache.calcite.rel.core.Project}. - */ - public static class ProjectReduceExpressionsRule extends HiveReduceExpressionsRule { - - public ProjectReduceExpressionsRule(Class projectClass, - RelBuilderFactory relBuilderFactory) { - super(projectClass, relBuilderFactory, "HiveReduceExpressionsRule(Project)"); - } - - public boolean matches(RelOptRuleCall call) { - Project project = call.rel(0); - HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); - - // If this operator has been visited already by the rule, - // we do not need to apply the optimization - if (registry != null && registry.getVisited(this).contains(project)) { - return false; - } - - return true; - } - - @Override public void onMatch(RelOptRuleCall call) { - Project project = call.rel(0); - // Register that we have visited this operator in this rule - HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); - if (registry != null) { - registry.registerVisited(this, project); - } - final RelOptPredicateList predicates = - RelMetadataQuery.instance().getPulledUpPredicates(project.getInput()); - final List expList = - Lists.newArrayList(project.getProjects()); - if (reduceExpressions(project, expList, predicates)) { - RelNode newProject = call.builder().push(project.getInput()) - .project(expList, project.getRowType().getFieldNames()).build(); - if (registry != null) { - registry.registerVisited(this, newProject); - } - call.transformTo(newProject); - - // New plan is absolutely better than old plan. - call.getPlanner().setImportance(project, 0.0); - } - } - } - - /** - * Rule that reduces constants inside a {@link org.apache.calcite.rel.core.HiveJoin}. - */ - public static class JoinReduceExpressionsRule extends HiveReduceExpressionsRule { - - public JoinReduceExpressionsRule(Class joinClass, - RelBuilderFactory relBuilderFactory) { - super(joinClass, relBuilderFactory, "HiveReduceExpressionsRule(HiveJoin)"); - } - - @Override public void onMatch(RelOptRuleCall call) { - final HiveJoin join = call.rel(0); - final List expList = Lists.newArrayList(join.getCondition()); - final int fieldCount = join.getLeft().getRowType().getFieldCount(); - RelMetadataQuery mq = RelMetadataQuery.instance(); - final RelOptPredicateList leftPredicates = - mq.getPulledUpPredicates(join.getLeft()); - final RelOptPredicateList rightPredicates = - mq.getPulledUpPredicates(join.getRight()); - final RelOptPredicateList predicates = - leftPredicates.union(rightPredicates.shift(fieldCount)); - if (!reduceExpressions(join, expList, predicates)) { - return; - } - final JoinInfo joinInfo = JoinInfo.of(join.getLeft(), join.getRight(), expList.get(0)); - if (!joinInfo.isEqui()) { - // This kind of join must be an equi-join, and the condition is - // no longer an equi-join. SemiJoin is an example of this. - return; - } - call.transformTo( - join.copy( - join.getTraitSet(), - expList.get(0), - join.getLeft(), - join.getRight(), - join.getJoinType(), - join.isSemiJoinDone())); - - // New plan is absolutely better than old plan. - call.getPlanner().setImportance(join, 0.0); - } - } - //~ Constructors ----------------------------------------------------------- /** @@ -259,596 +67,10 @@ public JoinReduceExpressionsRule(Class joinClass, * @param clazz class of rels to which this rule should apply */ protected HiveReduceExpressionsRule(Class clazz, - RelBuilderFactory relBuilderFactory, String desc) { - super(operand(clazz, any()), relBuilderFactory, desc); - } - - //~ Methods ---------------------------------------------------------------- - - /** - * Reduces a list of expressions. - * - * @param rel Relational expression - * @param expList List of expressions, modified in place - * @param predicates Constraints known to hold on input expressions - * @return whether reduction found something to change, and succeeded - */ - protected static boolean reduceExpressions(RelNode rel, List expList, - RelOptPredicateList predicates) { - RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); - - // Replace predicates on CASE to CASE on predicates. - new CaseShuttle().mutate(expList); - - // Find reducible expressions. - final List constExps = Lists.newArrayList(); - List addCasts = Lists.newArrayList(); - final List removableCasts = Lists.newArrayList(); - final ImmutableMap constants = - predicateConstants(predicates); - findReducibleExps(rel.getCluster().getTypeFactory(), expList, constants, - constExps, addCasts, removableCasts); - if (constExps.isEmpty() && removableCasts.isEmpty()) { - return false; - } - - // Remove redundant casts before reducing constant expressions. - // If the argument to the redundant cast is a reducible constant, - // reducing that argument to a constant first will result in not being - // able to locate the original cast expression. - if (!removableCasts.isEmpty()) { - final List reducedExprs = Lists.newArrayList(); - for (RexNode exp : removableCasts) { - RexCall call = (RexCall) exp; - reducedExprs.add(call.getOperands().get(0)); - } - RexReplacer replacer = - new RexReplacer( - rexBuilder, - removableCasts, - reducedExprs, - Collections.nCopies(removableCasts.size(), false)); - replacer.mutate(expList); - } - - if (constExps.isEmpty()) { - return true; - } - - final List constExps2 = Lists.newArrayList(constExps); - if (!constants.isEmpty()) { - //noinspection unchecked - final List> pairs = - (List>) (List) - Lists.newArrayList(constants.entrySet()); - RexReplacer replacer = - new RexReplacer( - rexBuilder, - Pair.left(pairs), - Pair.right(pairs), - Collections.nCopies(pairs.size(), false)); - replacer.mutate(constExps2); - } - - // Compute the values they reduce to. - RelOptPlanner.Executor executor = - rel.getCluster().getPlanner().getExecutor(); - if (executor == null) { - // Cannot reduce expressions: caller has not set an executor in their - // environment. Caller should execute something like the following before - // invoking the planner: - // - // final RexExecutorImpl executor = - // new RexExecutorImpl(Schemas.createDataContext(null)); - // rootRel.getCluster().getPlanner().setExecutor(executor); - return false; - } - - final List reducedValues = Lists.newArrayList(); - executor.reduce(rexBuilder, constExps2, reducedValues); - - // For Project, we have to be sure to preserve the result - // types, so always cast regardless of the expression type. - // For other RelNodes like Filter, in general, this isn't necessary, - // and the presence of casts could hinder other rules such as sarg - // analysis, which require bare literals. But there are special cases, - // like when the expression is a UDR argument, that need to be - // handled as special cases. - if (rel instanceof Project) { - addCasts = Collections.nCopies(reducedValues.size(), true); - } - - RexReplacer replacer = - new RexReplacer( - rexBuilder, - constExps, - reducedValues, - addCasts); - replacer.mutate(expList); - return true; - } - - /** - * Locates expressions that can be reduced to literals or converted to - * expressions with redundant casts removed. - * - * @param typeFactory Type factory - * @param exps list of candidate expressions to be examined for - * reduction - * @param constants List of expressions known to be constant - * @param constExps returns the list of expressions that can be constant - * reduced - * @param addCasts indicator for each expression that can be constant - * reduced, whether a cast of the resulting reduced - * expression is potentially necessary - * @param removableCasts returns the list of cast expressions where the cast - */ - protected static void findReducibleExps(RelDataTypeFactory typeFactory, - List exps, ImmutableMap constants, - List constExps, List addCasts, - List removableCasts) { - ReducibleExprLocator gardener = - new ReducibleExprLocator(typeFactory, constants, constExps, - addCasts, removableCasts); - for (RexNode exp : exps) { - gardener.analyze(exp); - } - assert constExps.size() == addCasts.size(); - } - - protected static ImmutableMap predicateConstants( - RelOptPredicateList predicates) { - // We cannot use an ImmutableMap.Builder here. If there are multiple entries - // with the same key (e.g. "WHERE deptno = 1 AND deptno = 2"), it doesn't - // matter which we take, so the latter will replace the former. - // The basic idea is to find all the pairs of RexNode = RexLiteral - // (1) If 'predicates' contain a non-EQUALS, we bail out. - // (2) It is OK if a RexNode is equal to the same RexLiteral several times, - // (e.g. "WHERE deptno = 1 AND deptno = 1") - // (3) It will return false if there are inconsistent constraints (e.g. - // "WHERE deptno = 1 AND deptno = 2") - final Map map = new HashMap<>(); - final Set excludeSet = new HashSet<>(); - for (RexNode predicate : predicates.pulledUpPredicates) { - gatherConstraints(map, predicate, excludeSet); - } - final ImmutableMap.Builder builder = - ImmutableMap.builder(); - for (Map.Entry entry : map.entrySet()) { - RexNode rexNode = entry.getKey(); - if (!overlap(rexNode, excludeSet)) { - builder.put(rexNode, entry.getValue()); - } - } - return builder.build(); - } - - private static boolean overlap(RexNode rexNode, Set set) { - if (rexNode instanceof RexCall) { - for (RexNode r : ((RexCall) rexNode).getOperands()) { - if (overlap(r, set)) { - return true; - } - } - return false; - } else { - return set.contains(rexNode); - } - } - - /** Tries to decompose the RexNode which is a RexCall into non-literal - * RexNodes. */ - private static void decompose(Set set, RexNode rexNode) { - if (rexNode instanceof RexCall) { - for (RexNode r : ((RexCall) rexNode).getOperands()) { - decompose(set, r); - } - } else if (!(rexNode instanceof RexLiteral)) { - set.add(rexNode); - } + RelBuilderFactory relBuilderFactory, String desc) { + super(clazz, relBuilderFactory, desc); } - private static void gatherConstraints(Map map, - RexNode predicate, Set excludeSet) { - if (predicate.getKind() != SqlKind.EQUALS) { - decompose(excludeSet, predicate); - return; - } - final List operands = ((RexCall) predicate).getOperands(); - if (operands.size() != 2) { - decompose(excludeSet, predicate); - return; - } - // if it reaches here, we have rexNode equals rexNode - final RexNode left = operands.get(0); - final RexNode right = operands.get(1); - // note that literals are immutable too and they can only be compared through - // values. - if (right instanceof RexLiteral && !excludeSet.contains(left)) { - RexLiteral existedValue = map.get(left); - if (existedValue == null) { - map.put(left, (RexLiteral) right); - } else { - if (!existedValue.getValue().equals(((RexLiteral) right).getValue())) { - // we found conflict values. - map.remove(left); - excludeSet.add(left); - } - } - } else if (left instanceof RexLiteral && !excludeSet.contains(right)) { - RexLiteral existedValue = map.get(right); - if (existedValue == null) { - map.put(right, (RexLiteral) left); - } else { - if (!existedValue.getValue().equals(((RexLiteral) left).getValue())) { - map.remove(right); - excludeSet.add(right); - } - } - } - } - - /** Pushes predicates into a CASE. - * - *

We have a loose definition of 'predicate': any boolean expression will - * do, except CASE. For example '(CASE ...) = 5' or '(CASE ...) IS NULL'. - */ - protected static RexCall pushPredicateIntoCase(RexCall call) { - if (call.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) { - return call; - } - switch (call.getKind()) { - case CASE: - case AND: - case OR: - return call; // don't push CASE into CASE! - } - int caseOrdinal = -1; - final List operands = call.getOperands(); - for (int i = 0; i < operands.size(); i++) { - RexNode operand = operands.get(i); - switch (operand.getKind()) { - case CASE: - caseOrdinal = i; - } - } - if (caseOrdinal < 0) { - return call; - } - // Convert - // f(CASE WHEN p1 THEN v1 ... END, arg) - // to - // CASE WHEN p1 THEN f(v1, arg) ... END - final RexCall case_ = (RexCall) operands.get(caseOrdinal); - final List nodes = new ArrayList<>(); - for (int i = 0; i < case_.getOperands().size(); i++) { - RexNode node = case_.getOperands().get(i); - if (!RexUtil.isCasePredicate(case_, i)) { - node = substitute(call, caseOrdinal, node); - } - nodes.add(node); - } - return case_.clone(call.getType(), nodes); - } - - /** Converts op(arg0, ..., argOrdinal, ..., argN) to op(arg0,..., node, ..., argN). */ - protected static RexNode substitute(RexCall call, int ordinal, RexNode node) { - final List newOperands = Lists.newArrayList(call.getOperands()); - newOperands.set(ordinal, node); - return call.clone(call.getType(), newOperands); - } - - //~ Inner Classes ---------------------------------------------------------- - - /** - * Replaces expressions with their reductions. Note that we only have to - * look for RexCall, since nothing else is reducible in the first place. - */ - protected static class RexReplacer extends RexShuttle { - private final RexBuilder rexBuilder; - private final List reducibleExps; - private final List reducedValues; - private final List addCasts; - - RexReplacer( - RexBuilder rexBuilder, - List reducibleExps, - List reducedValues, - List addCasts) { - this.rexBuilder = rexBuilder; - this.reducibleExps = reducibleExps; - this.reducedValues = reducedValues; - this.addCasts = addCasts; - } - - @Override public RexNode visitInputRef(RexInputRef inputRef) { - RexNode node = visit(inputRef); - if (node == null) { - return super.visitInputRef(inputRef); - } - return node; - } - - @Override public RexNode visitCall(RexCall call) { - RexNode node = visit(call); - if (node != null) { - return node; - } - node = super.visitCall(call); - if (node != call) { - node = HiveRexUtil.simplify(rexBuilder, node); - } - return node; - } - - private RexNode visit(final RexNode call) { - int i = reducibleExps.indexOf(call); - if (i == -1) { - return null; - } - RexNode replacement = reducedValues.get(i); - if (addCasts.get(i) - && (replacement.getType() != call.getType())) { - // Handle change from nullable to NOT NULL by claiming - // that the result is still nullable, even though - // we know it isn't. - // - // Also, we cannot reduce CAST('abc' AS VARCHAR(4)) to 'abc'. - // If we make 'abc' of type VARCHAR(4), we may later encounter - // the same expression in a Project's digest where it has - // type VARCHAR(3), and that's wrong. - replacement = rexBuilder.makeAbstractCast(call.getType(), replacement); - } - return replacement; - } - } - - /** - * Helper class used to locate expressions that either can be reduced to - * literals or contain redundant casts. - */ - protected static class ReducibleExprLocator extends RexVisitorImpl { - /** Whether an expression is constant, and if so, whether it can be - * reduced to a simpler constant. */ - enum Constancy { - NON_CONSTANT, REDUCIBLE_CONSTANT, IRREDUCIBLE_CONSTANT - } - - private final RelDataTypeFactory typeFactory; - - private final List stack; - - private final ImmutableMap constants; - - private final List constExprs; - - private final List addCasts; - - private final List removableCasts; - - private final List parentCallTypeStack; - - ReducibleExprLocator(RelDataTypeFactory typeFactory, - ImmutableMap constants, List constExprs, - List addCasts, List removableCasts) { - // go deep - super(true); - this.typeFactory = typeFactory; - this.constants = constants; - this.constExprs = constExprs; - this.addCasts = addCasts; - this.removableCasts = removableCasts; - this.stack = Lists.newArrayList(); - this.parentCallTypeStack = Lists.newArrayList(); - } - - public void analyze(RexNode exp) { - assert stack.isEmpty(); - - exp.accept(this); - - // Deal with top of stack - assert stack.size() == 1; - assert parentCallTypeStack.isEmpty(); - Constancy rootConstancy = stack.get(0); - if (rootConstancy == Constancy.REDUCIBLE_CONSTANT) { - // The entire subtree was constant, so add it to the result. - addResult(exp); - } - stack.clear(); - } - - private Void pushVariable() { - stack.add(Constancy.NON_CONSTANT); - return null; - } - - private void addResult(RexNode exp) { - // Cast of literal can't be reduced, so skip those (otherwise we'd - // go into an infinite loop as we add them back). - if (exp.getKind() == SqlKind.CAST) { - RexCall cast = (RexCall) exp; - RexNode operand = cast.getOperands().get(0); - if (operand instanceof RexLiteral) { - return; - } - } - constExprs.add(exp); - - // In the case where the expression corresponds to a UDR argument, - // we need to preserve casts. Note that this only applies to - // the topmost argument, not expressions nested within the UDR - // call. - // - // REVIEW zfong 6/13/08 - Are there other expressions where we - // also need to preserve casts? - if (parentCallTypeStack.isEmpty()) { - addCasts.add(false); - } else { - addCasts.add(isUdf(Stacks.peek(parentCallTypeStack))); - } - } - - private Boolean isUdf(SqlOperator operator) { - // return operator instanceof UserDefinedRoutine - return false; - } - - public Void visitInputRef(RexInputRef inputRef) { - if (constants.containsKey(inputRef)) { - stack.add(Constancy.REDUCIBLE_CONSTANT); - return null; - } - return pushVariable(); - } - - public Void visitLiteral(RexLiteral literal) { - stack.add(Constancy.IRREDUCIBLE_CONSTANT); - return null; - } - - public Void visitOver(RexOver over) { - // assume non-constant (running SUM(1) looks constant but isn't) - analyzeCall(over, Constancy.NON_CONSTANT); - return null; - } - - public Void visitCorrelVariable(RexCorrelVariable correlVariable) { - return pushVariable(); - } - - public Void visitCall(RexCall call) { - // assume REDUCIBLE_CONSTANT until proven otherwise - analyzeCall(call, Constancy.REDUCIBLE_CONSTANT); - return null; - } - - private void analyzeCall(RexCall call, Constancy callConstancy) { - Stacks.push(parentCallTypeStack, call.getOperator()); - - // visit operands, pushing their states onto stack - super.visitCall(call); - - // look for NON_CONSTANT operands - int operandCount = call.getOperands().size(); - List operandStack = Util.last(stack, operandCount); - for (Constancy operandConstancy : operandStack) { - if (operandConstancy == Constancy.NON_CONSTANT) { - callConstancy = Constancy.NON_CONSTANT; - } - } - - // Even if all operands are constant, the call itself may - // be non-deterministic. - if (!call.getOperator().isDeterministic()) { - callConstancy = Constancy.NON_CONSTANT; - } else if (call.getOperator().isDynamicFunction()) { - // We can reduce the call to a constant, but we can't - // cache the plan if the function is dynamic. - // For now, treat it same as non-deterministic. - callConstancy = Constancy.NON_CONSTANT; - } - - // Row operator itself can't be reduced to a literal, but if - // the operands are constants, we still want to reduce those - if ((callConstancy == Constancy.REDUCIBLE_CONSTANT) - && (call.getOperator() instanceof SqlRowOperator)) { - callConstancy = Constancy.NON_CONSTANT; - } - - if (callConstancy == Constancy.NON_CONSTANT) { - // any REDUCIBLE_CONSTANT children are now known to be maximal - // reducible subtrees, so they can be added to the result - // list - for (int iOperand = 0; iOperand < operandCount; ++iOperand) { - Constancy constancy = operandStack.get(iOperand); - if (constancy == Constancy.REDUCIBLE_CONSTANT) { - addResult(call.getOperands().get(iOperand)); - } - } - - // if this cast expression can't be reduced to a literal, - // then see if we can remove the cast - if (call.getOperator() == SqlStdOperatorTable.CAST) { - reduceCasts(call); - } - } - - // pop operands off of the stack - operandStack.clear(); - - // pop this parent call operator off the stack - Stacks.pop(parentCallTypeStack, call.getOperator()); - - // push constancy result for this call onto stack - stack.add(callConstancy); - } - - private void reduceCasts(RexCall outerCast) { - List operands = outerCast.getOperands(); - if (operands.size() != 1) { - return; - } - RelDataType outerCastType = outerCast.getType(); - RelDataType operandType = operands.get(0).getType(); - if (operandType.equals(outerCastType)) { - removableCasts.add(outerCast); - return; - } - - // See if the reduction - // CAST((CAST x AS type) AS type NOT NULL) - // -> CAST(x AS type NOT NULL) - // applies. TODO jvs 15-Dec-2008: consider - // similar cases for precision changes. - if (!(operands.get(0) instanceof RexCall)) { - return; - } - RexCall innerCast = (RexCall) operands.get(0); - if (innerCast.getOperator() != SqlStdOperatorTable.CAST) { - return; - } - if (innerCast.getOperands().size() != 1) { - return; - } - RelDataType outerTypeNullable = - typeFactory.createTypeWithNullability(outerCastType, true); - RelDataType innerTypeNullable = - typeFactory.createTypeWithNullability(operandType, true); - if (outerTypeNullable != innerTypeNullable) { - return; - } - if (operandType.isNullable()) { - removableCasts.add(innerCast); - } - } - - public Void visitDynamicParam(RexDynamicParam dynamicParam) { - return pushVariable(); - } - - public Void visitRangeRef(RexRangeRef rangeRef) { - return pushVariable(); - } - - public Void visitFieldAccess(RexFieldAccess fieldAccess) { - return pushVariable(); - } - } - - /** Shuttle that pushes predicates into a CASE. */ - protected static class CaseShuttle extends RexShuttle { - @Override public RexNode visitCall(RexCall call) { - for (;;) { - call = (RexCall) super.visitCall(call); - final RexCall old = call; - call = pushPredicateIntoCase(call); - if (call == old) { - return call; - } - } - } - } } // End HiveReduceExpressionsRule.java diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 03002cc..40e04fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -25,34 +25,18 @@ import java.util.Set; import org.apache.calcite.linq4j.Ord; -import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelCollations; -import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexCorrelVariable; -import org.apache.calcite.rex.RexFieldAccess; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexPermuteInputsShuttle; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.sql.validate.SqlValidator; -import org.apache.calcite.sql2rel.CorrelationReferenceFinder; import org.apache.calcite.sql2rel.RelFieldTrimmer; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.Util; import org.apache.calcite.util.mapping.IntPair; import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; @@ -62,31 +46,24 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; -import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; public class HiveRelFieldTrimmer extends RelFieldTrimmer { protected static final Log LOG = LogFactory.getLog(HiveRelFieldTrimmer.class); - private RelBuilder relBuilder; - private ColumnAccessInfo columnAccessInfo; - private Map viewProjectToTableSchema; public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) { super(validator, relBuilder); - this.relBuilder = relBuilder; } public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, ColumnAccessInfo columnAccessInfo, Map viewToTableSchema) { super(validator, relBuilder); - this.relBuilder = relBuilder; this.columnAccessInfo = columnAccessInfo; this.viewProjectToTableSchema = viewToTableSchema; } @@ -193,190 +170,6 @@ public TrimResult trimFields( /** * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for - * {@link org.apache.calcite.rel.core.Sort}. - */ - public TrimResult trimFields( - HiveSortLimit sort, - ImmutableBitSet fieldsUsed, - Set extraFields) { - final RelDataType rowType = sort.getRowType(); - final int fieldCount = rowType.getFieldCount(); - final RelCollation collation = sort.getCollation(); - final RelNode input = sort.getInput(); - RelOptCluster cluster = sort.getCluster(); - - // We use the fields used by the consumer, plus any fields used as sort - // keys. - final ImmutableBitSet.Builder inputFieldsUsed = - ImmutableBitSet.builder(fieldsUsed); - for (RelFieldCollation field : collation.getFieldCollations()) { - inputFieldsUsed.set(field.getFieldIndex()); - } - - // Create input with trimmed columns. - final Set inputExtraFields = Collections.emptySet(); - TrimResult trimResult = - trimChild(sort, input, inputFieldsUsed.build(), inputExtraFields); - RelNode newInput = trimResult.left; - final Mapping inputMapping = trimResult.right; - - // If the input is unchanged, and we need to project all columns, - // there's nothing we can do. - if (newInput == input - && inputMapping.isIdentity() - && fieldsUsed.cardinality() == fieldCount) { - return result(sort, Mappings.createIdentity(fieldCount)); - } - - relBuilder.push(newInput); - final int offset = - sort.offset == null ? 0 : RexLiteral.intValue(sort.offset); - final int fetch = - sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch); - final ImmutableList fields = - relBuilder.fields(RexUtil.apply(inputMapping, collation)); - - // The result has the same mapping as the input gave us. Sometimes we - // return fields that the consumer didn't ask for, because the filter - // needs them for its condition. - // TODO: Calcite will return empty LogicalValues when offset == 0 && fetch == 0. - // However, Hive ASTConverter can not deal with LogicalValues. - sortLimit(cluster, relBuilder, offset, fetch, fields); - return result(relBuilder.build(), inputMapping); - } - - private List projects(RelDataType inputRowType, RelOptCluster cluster) { - final List exprList = new ArrayList<>(); - for (RelDataTypeField field : inputRowType.getFieldList()) { - final RexBuilder rexBuilder = cluster.getRexBuilder(); - exprList.add(rexBuilder.makeInputRef(field.getType(), field.getIndex())); - } - return exprList; - } - - private static RelFieldCollation collation(RexNode node, - RelFieldCollation.Direction direction, - RelFieldCollation.NullDirection nullDirection, List extraNodes) { - switch (node.getKind()) { - case INPUT_REF: - return new RelFieldCollation(((RexInputRef) node).getIndex(), direction, - Util.first(nullDirection, direction.defaultNullDirection())); - case DESCENDING: - return collation(((RexCall) node).getOperands().get(0), - RelFieldCollation.Direction.DESCENDING, - nullDirection, extraNodes); - case NULLS_FIRST: - return collation(((RexCall) node).getOperands().get(0), direction, - RelFieldCollation.NullDirection.FIRST, extraNodes); - case NULLS_LAST: - return collation(((RexCall) node).getOperands().get(0), direction, - RelFieldCollation.NullDirection.LAST, extraNodes); - default: - final int fieldIndex = extraNodes.size(); - extraNodes.add(node); - return new RelFieldCollation(fieldIndex, direction, - Util.first(nullDirection, direction.defaultNullDirection())); - } - } - - private void sortLimit(RelOptCluster cluster, RelBuilder relBuilder, int offset, int fetch, - Iterable nodes) { - final List fieldCollations = new ArrayList<>(); - final RelDataType inputRowType = relBuilder.peek().getRowType(); - final List extraNodes = projects(inputRowType, cluster); - final List originalExtraNodes = ImmutableList.copyOf(extraNodes); - for (RexNode node : nodes) { - fieldCollations.add( - collation(node, RelFieldCollation.Direction.ASCENDING, - RelFieldCollation.NullDirection.FIRST, extraNodes)); - } - final RexNode offsetNode = offset <= 0 ? null : relBuilder.literal(offset); - final RexNode fetchNode = fetch < 0 ? null : relBuilder.literal(fetch); - if (offsetNode == null && fetchNode == null && fieldCollations.isEmpty()) { - return; // sort is trivial - } - - final boolean addedFields = extraNodes.size() > originalExtraNodes.size(); - if (fieldCollations.isEmpty()) { - assert !addedFields; - RelNode top = relBuilder.peek(); - if (top instanceof Sort) { - final Sort sort2 = (Sort) top; - if (sort2.offset == null && sort2.fetch == null) { - relBuilder.build(); - relBuilder.push(sort2.getInput()); - final RelNode sort = - HiveSortLimit.create(relBuilder.build(), sort2.collation, - offsetNode, fetchNode); - relBuilder.push(sort); - return; - } - } - if (top instanceof Project) { - final Project project = (Project) top; - if (project.getInput() instanceof Sort) { - final Sort sort2 = (Sort) project.getInput(); - if (sort2.offset == null && sort2.fetch == null) { - relBuilder.build(); - relBuilder.push(sort2.getInput()); - final RelNode sort = - HiveSortLimit.create(relBuilder.build(), sort2.collation, - offsetNode, fetchNode); - relBuilder.push(sort); - relBuilder.project(project.getProjects()); - return; - } - } - } - } - if (addedFields) { - relBuilder.project(extraNodes); - } - final RelNode sort = - HiveSortLimit.create(relBuilder.build(), RelCollations.of(fieldCollations), - offsetNode, fetchNode); - relBuilder.push(sort); - if (addedFields) { - relBuilder.project(originalExtraNodes); - } - return; - } - - private TrimResult result(RelNode r, final Mapping mapping) { - final RexBuilder rexBuilder = relBuilder.getRexBuilder(); - for (final CorrelationId correlation : r.getVariablesSet()) { - r = r.accept( - new CorrelationReferenceFinder() { - @Override - protected RexNode handle(RexFieldAccess fieldAccess) { - final RexCorrelVariable v = - (RexCorrelVariable) fieldAccess.getReferenceExpr(); - if (v.id.equals(correlation) - && v.getType().getFieldCount() == mapping.getSourceCount()) { - final int old = fieldAccess.getField().getIndex(); - final int new_ = mapping.getTarget(old); - final RelDataTypeFactory.FieldInfoBuilder typeBuilder = - relBuilder.getTypeFactory().builder(); - for (int target : Util.range(mapping.getTargetCount())) { - typeBuilder.add( - v.getType().getFieldList().get(mapping.getSource(target))); - } - final RexNode newV = - rexBuilder.makeCorrel(typeBuilder.build(), v.id); - if (old != new_) { - return rexBuilder.makeFieldAccess(newV, new_); - } - } - return fieldAccess; - } - - }); - } - return new TrimResult(r, mapping); - } - - /** - * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for * {@link org.apache.calcite.rel.logical.LogicalProject}. */ public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java index 18fe650..f5b3f32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java @@ -20,7 +20,10 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.metadata.BuiltInMetadata; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdCollation; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -32,13 +35,15 @@ import com.google.common.collect.ImmutableList; -public class HiveRelMdCollation { +public class HiveRelMdCollation implements MetadataHandler { + + private static final HiveRelMdCollation INSTANCE = new HiveRelMdCollation(); public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider.of( ImmutableList.of( ReflectiveRelMetadataProvider.reflectiveSource( - BuiltInMethod.COLLATIONS.method, new HiveRelMdCollation()), + BuiltInMethod.COLLATIONS.method, INSTANCE), RelMdCollation.SOURCE)); //~ Constructors ----------------------------------------------------------- @@ -47,6 +52,10 @@ private HiveRelMdCollation() {} //~ Methods ---------------------------------------------------------------- + public MetadataDef getDef() { + return BuiltInMetadata.Collation.DEF; + } + public ImmutableList collations(HiveAggregate aggregate, RelMetadataQuery mq) { // Compute collations ImmutableList.Builder collationListBuilder = diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java index 62d3ead..6670aef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java @@ -18,9 +18,11 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import org.apache.calcite.rel.RelDistribution; +import org.apache.calcite.rel.metadata.BuiltInMetadata; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMdDistribution; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.BuiltInMethod; @@ -30,13 +32,15 @@ import com.google.common.collect.ImmutableList; -public class HiveRelMdDistribution { +public class HiveRelMdDistribution implements MetadataHandler { + + private static final HiveRelMdDistribution INSTANCE = new HiveRelMdDistribution(); public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider.of( ImmutableList.of( ReflectiveRelMetadataProvider.reflectiveSource( - BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()))); + BuiltInMethod.DISTRIBUTION.method, INSTANCE))); //~ Constructors ----------------------------------------------------------- @@ -44,6 +48,10 @@ private HiveRelMdDistribution() {} //~ Methods ---------------------------------------------------------------- + public MetadataDef getDef() { + return BuiltInMetadata.Distribution.DEF; + } + public RelDistribution distribution(HiveAggregate aggregate, RelMetadataQuery mq) { return new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, aggregate.getGroupSet().asList()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java index 0718150..b131248 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -30,9 +30,9 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.BuiltInMetadata; -import org.apache.calcite.rel.metadata.Metadata; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMdUniqueKeys; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexInputRef; @@ -43,13 +43,17 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.plan.ColStatistics; -import com.google.common.base.Function; +public class HiveRelMdUniqueKeys implements MetadataHandler { -public class HiveRelMdUniqueKeys { + private static final HiveRelMdUniqueKeys INSTANCE = new HiveRelMdUniqueKeys(); - public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider - .reflectiveSource(BuiltInMethod.UNIQUE_KEYS.method, - new HiveRelMdUniqueKeys()); + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.UNIQUE_KEYS.method, INSTANCE); + + + public MetadataDef getDef() { + return BuiltInMetadata.UniqueKeys.DEF; + } /* * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)