diff --git pom.xml pom.xml index 6a96391..fc9b546 100644 --- pom.xml +++ pom.xml @@ -104,7 +104,7 @@ 3.4 1.7.7 0.8.0.RELEASE - 1.4.0-incubating + 1.5.0-incubating-SNAPSHOT 3.2.6 3.2.10 3.2.9 diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java index 1831d69..446dc73 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java @@ -29,13 +29,13 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; -import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -152,7 +152,7 @@ public double getRows() { @Override public RelNode project(ImmutableBitSet fieldsUsed, Set extraFields, - RelFactories.ProjectFactory projectFactory) { + RelBuilder relBuilder) { // 1. If the schema is the same then bail out final int fieldCount = getRowType().getFieldCount(); @@ -183,7 +183,7 @@ public RelNode project(ImmutableBitSet fieldsUsed, Set extraFi fieldNames)); // 5. Add Proj on top of TS - return projectFactory.createProject(newHT, exprList, new ArrayList(fieldNames)); + return relBuilder.push(newHT).project(exprList, new ArrayList(fieldNames)).build(); } public List getNeededColIndxsFrmReloptHT() { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index a12fa2a..633187b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -23,10 +23,9 @@ import java.util.List; import java.util.Set; +import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; @@ -35,6 +34,7 @@ import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.RelFieldTrimmer; +import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.mapping.IntPair; import org.apache.calcite.util.mapping.Mapping; @@ -42,15 +42,10 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; - public class HiveRelFieldTrimmer extends RelFieldTrimmer { - private final RelFactories.AggregateFactory aggregateFactory; - public HiveRelFieldTrimmer(SqlValidator validator, + RelOptCluster cluster, RelFactories.ProjectFactory projectFactory, RelFactories.FilterFactory filterFactory, RelFactories.JoinFactory joinFactory, @@ -58,9 +53,10 @@ public HiveRelFieldTrimmer(SqlValidator validator, RelFactories.SortFactory sortFactory, RelFactories.AggregateFactory aggregateFactory, RelFactories.SetOpFactory setOpFactory) { - super(validator, projectFactory, filterFactory, joinFactory, - semiJoinFactory, sortFactory, aggregateFactory, setOpFactory); - this.aggregateFactory = aggregateFactory; + super(validator, + RelBuilder.proto(projectFactory, filterFactory, joinFactory, + semiJoinFactory, sortFactory, aggregateFactory, setOpFactory) + .create(cluster, null)); } /** @@ -155,127 +151,5 @@ public TrimResult trimFields( return new TrimResult(newJoin, mapping); } - /** - * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for - * {@link org.apache.calcite.rel.logical.LogicalAggregate}. - */ - @Override - public TrimResult trimFields( - Aggregate aggregate, - ImmutableBitSet fieldsUsed, - Set extraFields) { - // Fields: - // - // | sys fields | group fields | indicator fields | agg functions | - // - // Two kinds of trimming: - // - // 1. If agg rel has system fields but none of these are used, create an - // agg rel with no system fields. - // - // 2. If aggregate functions are not used, remove them. - // - // But group and indicator fields stay, even if they are not used. - - final RelDataType rowType = aggregate.getRowType(); - - // Compute which input fields are used. - // 1. group fields are always used - final ImmutableBitSet.Builder inputFieldsUsed = - ImmutableBitSet.builder(aggregate.getGroupSet()); - // 2. agg functions - for (AggregateCall aggCall : aggregate.getAggCallList()) { - for (int i : aggCall.getArgList()) { - inputFieldsUsed.set(i); - } - if (aggCall.filterArg >= 0) { - inputFieldsUsed.set(aggCall.filterArg); - } - } - - // Create input with trimmed columns. - final RelNode input = aggregate.getInput(); - final Set inputExtraFields = Collections.emptySet(); - final TrimResult trimResult = - trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields); - final RelNode newInput = trimResult.left; - final Mapping inputMapping = trimResult.right; - - // We have to return group keys and (if present) indicators. - // So, pretend that the consumer asked for them. - final int groupCount = aggregate.getGroupSet().cardinality(); - final int indicatorCount = aggregate.getIndicatorCount(); - fieldsUsed = - fieldsUsed.union(ImmutableBitSet.range(groupCount + indicatorCount)); - - // If the input is unchanged, and we need to project all columns, - // there's nothing to do. - if (input == newInput - && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) { - return new TrimResult( - aggregate, - Mappings.createIdentity(rowType.getFieldCount())); - } - - // Which agg calls are used by our consumer? - int j = groupCount + indicatorCount; - int usedAggCallCount = 0; - for (int i = 0; i < aggregate.getAggCallList().size(); i++) { - if (fieldsUsed.get(j++)) { - ++usedAggCallCount; - } - } - - // Offset due to the number of system fields having changed. - Mapping mapping = - Mappings.create( - MappingType.INVERSE_SURJECTION, - rowType.getFieldCount(), - groupCount + indicatorCount + usedAggCallCount); - - final ImmutableBitSet newGroupSet = - Mappings.apply(inputMapping, aggregate.getGroupSet()); - - final ImmutableList newGroupSets = - ImmutableList.copyOf( - Iterables.transform(aggregate.getGroupSets(), - new Function() { - @Override - public ImmutableBitSet apply(ImmutableBitSet input) { - return Mappings.apply(inputMapping, input); - } - })); - - // Populate mapping of where to find the fields. System, group key and - // indicator fields first. - for (j = 0; j < groupCount + indicatorCount; j++) { - mapping.set(j, j); - } - - // Now create new agg calls, and populate mapping for them. - final List newAggCallList = new ArrayList<>(); - j = groupCount + indicatorCount; - for (AggregateCall aggCall : aggregate.getAggCallList()) { - if (fieldsUsed.get(j)) { - AggregateCall newAggCall = - aggCall.copy(Mappings.apply2(inputMapping, aggCall.getArgList()), - Mappings.apply(inputMapping, aggCall.filterArg)); - if (newAggCall.equals(aggCall)) { - newAggCall = aggCall; // immutable -> canonize to save space - } - mapping.set(j, groupCount + indicatorCount + newAggCallList.size()); - newAggCallList.add(newAggCall); - } - ++j; - } - - RelNode newAggregate = aggregateFactory.createAggregate(newInput, - aggregate.indicator, newGroupSet, newGroupSets, newAggCallList); - - assert newAggregate.getClass() == aggregate.getClass(); - - return new TrimResult(newAggregate, mapping); - } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 14946b3..ff2efc0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -77,8 +77,7 @@ private Aggregate groupBy; private Filter having; private Project select; - private Sort order; - private Sort limit; + private Sort orderLimit; private Schema schema; @@ -203,27 +202,14 @@ else if (aggregateType == Group.CUBE) { * parent hence we need to go top down; but OB at each block really belong * to its src/from. Hence the need to pass in sort for each block from * its parent. + * 8. Limit */ - convertOBToASTNode((HiveSortLimit) order); - - // 8. Limit - convertLimitToASTNode((HiveSortLimit) limit); + convertOrderLimitToASTNode((HiveSortLimit) orderLimit); return hiveAST.getAST(); } - private void convertLimitToASTNode(HiveSortLimit limit) { - if (limit != null) { - HiveSortLimit hiveLimit = limit; - RexNode limitExpr = hiveLimit.getFetchExpr(); - if (limitExpr != null) { - Object val = ((RexLiteral) limitExpr).getValue2(); - hiveAST.limit = ASTBuilder.limit(val); - } - } - } - - private void convertOBToASTNode(HiveSortLimit order) { + private void convertOrderLimitToASTNode(HiveSortLimit order) { if (order != null) { HiveSortLimit hiveSortLimit = order; if (!hiveSortLimit.getCollation().getFieldCollations().isEmpty()) { @@ -264,6 +250,12 @@ private void convertOBToASTNode(HiveSortLimit order) { } hiveAST.order = orderAst; } + + RexNode limitExpr = hiveSortLimit.getFetchExpr(); + if (limitExpr != null) { + Object val = ((RexLiteral) limitExpr).getValue2(); + hiveAST.limit = ASTBuilder.limit(val); + } } } @@ -366,11 +358,7 @@ public void visit(RelNode node, int ordinal, RelNode parent) { if (ASTConverter.this.select != null) { ASTConverter.this.from = node; } else { - Sort hiveSortRel = (Sort) node; - if (hiveSortRel.getCollation().getFieldCollations().isEmpty()) - ASTConverter.this.limit = hiveSortRel; - else - ASTConverter.this.order = hiveSortRel; + ASTConverter.this.orderLimit = (Sort) node; } } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e13356c..de67b54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -58,8 +58,6 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -902,7 +900,8 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE); // The previous rules can pull up projections through join operators, // thus we run the field trimmer again to push them back down - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + cluster, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, HiveSemiJoin.HIVE_SEMIJOIN_FACTORY, HiveSortLimit.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); @@ -983,7 +982,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new HivePartitionPruneRule(conf)); // 5. Projection Pruning - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + cluster, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, HiveSemiJoin.HIVE_SEMIJOIN_FACTORY, HiveSortLimit.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY);