From b09ae09a2ccd52113e37ecf18457f3f486babda2 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 27 Aug 2015 19:56:42 -0700 Subject: [PATCH] HIVE-11678 : Add AggregateProjectMergeRule --- .../rules/HiveAggregateProjectMergeRule.java | 151 +++++++++++++++++++++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 2 + 2 files changed, 153 insertions(+) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java new file mode 100644 index 0000000..53f04ee --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateProjectMergeRule.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Planner rule that recognizes a {@link HiveAggregate} + * on top of a {@link HiveProject} and if possible + * aggregate through the project or removes the project. + * + *

This is only possible when the grouping expressions and arguments to + * the aggregate functions are field references (i.e. not expressions). + * + *

In some cases, this rule has the effect of trimming: the aggregate will + * use fewer columns than the project did. + */ +public class HiveAggregateProjectMergeRule extends RelOptRule { + public static final HiveAggregateProjectMergeRule INSTANCE = + new HiveAggregateProjectMergeRule(); + + /** Private constructor. */ + private HiveAggregateProjectMergeRule() { + super( + operand(HiveAggregate.class, + operand(HiveProject.class, any()))); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final HiveAggregate aggregate = call.rel(0); + final HiveProject project = call.rel(1); + RelNode x = apply(aggregate, project); + if (x != null) { + call.transformTo(x); + } + } + + public static RelNode apply(HiveAggregate aggregate, + HiveProject project) { + final List newKeys = Lists.newArrayList(); + final Map map = new HashMap<>(); + for (int key : aggregate.getGroupSet()) { + final RexNode rex = project.getProjects().get(key); + if (rex instanceof RexInputRef) { + final int newKey = ((RexInputRef) rex).getIndex(); + newKeys.add(newKey); + map.put(key, newKey); + } else { + // Cannot handle "GROUP BY expression" + return null; + } + } + + final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map); + ImmutableList newGroupingSets = null; + if (aggregate.indicator) { + newGroupingSets = + ImmutableBitSet.ORDERING.immutableSortedCopy( + ImmutableBitSet.permute(aggregate.getGroupSets(), map)); + } + + final ImmutableList.Builder aggCalls = + ImmutableList.builder(); + for (AggregateCall aggregateCall : aggregate.getAggCallList()) { + final ImmutableList.Builder newArgs = ImmutableList.builder(); + for (int arg : aggregateCall.getArgList()) { + final RexNode rex = project.getProjects().get(arg); + if (rex instanceof RexInputRef) { + newArgs.add(((RexInputRef) rex).getIndex()); + } else { + // Cannot handle "AGG(expression)" + return null; + } + } + final int newFilterArg; + if (aggregateCall.filterArg >= 0) { + final RexNode rex = project.getProjects().get(aggregateCall.filterArg); + if (!(rex instanceof RexInputRef)) { + return null; + } + newFilterArg = ((RexInputRef) rex).getIndex(); + } else { + newFilterArg = -1; + } + aggCalls.add(aggregateCall.copy(newArgs.build(), newFilterArg)); + } + + final Aggregate newAggregate = + aggregate.copy(aggregate.getTraitSet(), project.getInput(), + aggregate.indicator, newGroupSet, newGroupingSets, + aggCalls.build()); + + // Add a project if the group set is not in the same order or + // contains duplicates. + RelNode rel = newAggregate; + if (!newKeys.equals(newGroupSet.asList())) { + final List posList = Lists.newArrayList(); + for (int newKey : newKeys) { + posList.add(newGroupSet.indexOf(newKey)); + } + if (aggregate.indicator) { + for (int newKey : newKeys) { + posList.add(aggregate.getGroupCount() + newGroupSet.indexOf(newKey)); + } + } + for (int i = newAggregate.getGroupCount() + + newAggregate.getIndicatorCount(); + i < newAggregate.getRowType().getFieldCount(); i++) { + posList.add(i); + } + rel = RelOptUtil.createProject(HiveProject.DEFAULT_PROJECT_FACTORY, + rel, posList); + } + + return rel; + } +} + +// End AggregateProjectMergeRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f26d1df..cc31801 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -134,6 +134,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; @@ -884,6 +885,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE); hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY)); + hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE); hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); -- 1.7.12.4 (Apple Git-37)