diff --git pom.xml pom.xml index 15c2805..1d6887e 100644 --- pom.xml +++ pom.xml @@ -103,7 +103,7 @@ 3.4 1.7.7 0.8.0.RELEASE - 1.3.0-incubating + 1.4.0-incubating 3.2.6 3.2.10 3.2.9 @@ -180,6 +180,19 @@ + calcite + calcite maven repository + https://repository.apache.org/content/repositories/orgapachecalcite-1009 + default + + true + warn + + + false + + + datanucleus datanucleus maven repository http://www.datanucleus.org/downloads/maven2 diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 5a5954d..0e282b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -278,29 +278,6 @@ private static SqlKind reverse(SqlKind kind) { } } - private static SqlOperator op(SqlKind kind, SqlOperator operator) { - switch (kind) { - case EQUALS: - return SqlStdOperatorTable.EQUALS; - case NOT_EQUALS: - return SqlStdOperatorTable.NOT_EQUALS; - case GREATER_THAN: - return SqlStdOperatorTable.GREATER_THAN; - case GREATER_THAN_OR_EQUAL: - return SqlStdOperatorTable.GREATER_THAN_OR_EQUAL; - case LESS_THAN: - return SqlStdOperatorTable.LESS_THAN; - case LESS_THAN_OR_EQUAL: - return SqlStdOperatorTable.LESS_THAN_OR_EQUAL; - case IS_DISTINCT_FROM: - return SqlStdOperatorTable.IS_DISTINCT_FROM; - case IS_NOT_DISTINCT_FROM: - return SqlStdOperatorTable.IS_NOT_DISTINCT_FROM; - default: - return operator; - } - } - private static void addJoinKey( List joinKeyList, RexNode key, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java index 18d2838..1df6542 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java @@ -22,6 +22,7 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; @@ -49,6 +50,24 @@ public HiveSort(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, offset, fetch); } + /** + * Creates a HiveSort. + * + * @param input Input relational expression + * @param collation array of sort specifications + * @param offset Expression for number of rows to discard before returning + * first row + * @param fetch Expression for number of rows to fetch + */ + public static HiveSort create(RelNode input, RelCollation collation, + RexNode offset, RexNode fetch) { + RelOptCluster cluster = input.getCluster(); + collation = RelCollationTraitDef.INSTANCE.canonize(collation); + RelTraitSet traitSet = + TraitsUtil.getSortTraitSet(cluster, input.getTraitSet(), collation); + return new HiveSort(cluster, traitSet, input, collation, offset, fetch); + } + @Override public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation, RexNode offset, RexNode fetch) { @@ -77,9 +96,15 @@ public void implement(Implementor implementor) { private static class HiveSortRelFactory implements RelFactories.SortFactory { @Override - public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation, + public RelNode createSort(RelTraitSet traits, RelNode input, RelCollation collation, RexNode offset, RexNode fetch) { - return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch); + return createSort(input, collation, offset, fetch); + } + + @Override + public RelNode createSort(RelNode input, RelCollation collation, RexNode offset, + RexNode fetch) { + return create(input, collation, offset, fetch); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java index fd8f5cb..ac72ee5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java @@ -49,14 +49,14 @@ operand(HiveProject.class, any()), operand(HiveProject.class, any())), "JoinProjectTransposeRule(Project-Project)", - HiveProject.DEFAULT_PROJECT_FACTORY); + false, HiveProject.DEFAULT_PROJECT_FACTORY); public static final HiveJoinProjectTransposeRule LEFT_PROJECT = new HiveJoinProjectTransposeRule( operand(HiveJoin.class, some(operand(HiveProject.class, any()))), "JoinProjectTransposeRule(Project-Other)", - HiveProject.DEFAULT_PROJECT_FACTORY); + false, HiveProject.DEFAULT_PROJECT_FACTORY); public static final HiveJoinProjectTransposeRule RIGHT_PROJECT = new HiveJoinProjectTransposeRule( @@ -65,219 +65,37 @@ operand(RelNode.class, any()), operand(HiveProject.class, any())), "JoinProjectTransposeRule(Other-Project)", - HiveProject.DEFAULT_PROJECT_FACTORY); + false, HiveProject.DEFAULT_PROJECT_FACTORY); - private final ProjectFactory projectFactory; - - - private HiveJoinProjectTransposeRule( - RelOptRuleOperand operand, - String description, ProjectFactory pFactory) { - super(operand, description, pFactory); - this.projectFactory = pFactory; - } - - @Override - public void onMatch(RelOptRuleCall call) { - Join joinRel = call.rel(0); - JoinRelType joinType = joinRel.getJoinType(); - - Project leftProj; - Project rightProj; - RelNode leftJoinChild; - RelNode rightJoinChild; - - // see if at least one input's projection doesn't generate nulls - if (hasLeftChild(call)) { - leftProj = call.rel(1); - leftJoinChild = getProjectChild(call, leftProj, true); - } else { - leftProj = null; - leftJoinChild = call.rel(1); - } - if (hasRightChild(call)) { - rightProj = getRightChild(call); - rightJoinChild = getProjectChild(call, rightProj, false); - } else { - rightProj = null; - rightJoinChild = joinRel.getRight(); - } - if ((leftProj == null) && (rightProj == null)) { - return; - } - - // Construct two RexPrograms and combine them. The bottom program - // is a join of the projection expressions from the left and/or - // right projects that feed into the join. The top program contains - // the join condition. - - // Create a row type representing a concatenation of the inputs - // underneath the projects that feed into the join. This is the input - // into the bottom RexProgram. Note that the join type is an inner - // join because the inputs haven't actually been joined yet. - RelDataType joinChildrenRowType = - Join.deriveJoinRowType( - leftJoinChild.getRowType(), - rightJoinChild.getRowType(), - JoinRelType.INNER, - joinRel.getCluster().getTypeFactory(), - null, - Collections.emptyList()); - - // Create projection expressions, combining the projection expressions - // from the projects that feed into the join. For the RHS projection - // expressions, shift them to the right by the number of fields on - // the LHS. If the join input was not a projection, simply create - // references to the inputs. - int nProjExprs = joinRel.getRowType().getFieldCount(); - List> projects = - new ArrayList>(); - RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); - - createProjectExprs( - leftProj, - leftJoinChild, - 0, - rexBuilder, - joinChildrenRowType.getFieldList(), - projects); - - List leftFields = - leftJoinChild.getRowType().getFieldList(); - int nFieldsLeft = leftFields.size(); - createProjectExprs( - rightProj, - rightJoinChild, - nFieldsLeft, - rexBuilder, - joinChildrenRowType.getFieldList(), - projects); - - List projTypes = new ArrayList(); - for (int i = 0; i < nProjExprs; i++) { - projTypes.add(projects.get(i).left.getType()); - } - RelDataType projRowType = - rexBuilder.getTypeFactory().createStructType( - projTypes, - Pair.right(projects)); - - // create the RexPrograms and merge them - RexProgram bottomProgram = - RexProgram.create( - joinChildrenRowType, - Pair.left(projects), - null, - projRowType, - rexBuilder); - RexProgramBuilder topProgramBuilder = - new RexProgramBuilder( - projRowType, - rexBuilder); - topProgramBuilder.addIdentity(); - topProgramBuilder.addCondition(joinRel.getCondition()); - RexProgram topProgram = topProgramBuilder.getProgram(); - RexProgram mergedProgram = - RexProgramBuilder.mergePrograms( - topProgram, - bottomProgram, - rexBuilder); + public static final HiveJoinProjectTransposeRule BOTH_PROJECT_INCLUDE_OUTER = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + operand(HiveProject.class, any()), + operand(HiveProject.class, any())), + "Join(IncludingOuter)ProjectTransposeRule(Project-Project)", + true, HiveProject.DEFAULT_PROJECT_FACTORY); - // expand out the join condition and construct a new LogicalJoin that - // directly references the join children without the intervening - // ProjectRels - RexNode newCondition = - mergedProgram.expandLocalRef( - mergedProgram.getCondition()); - Join newJoinRel = - joinRel.copy(joinRel.getTraitSet(), newCondition, - leftJoinChild, rightJoinChild, joinRel.getJoinType(), - joinRel.isSemiJoinDone()); + public static final HiveJoinProjectTransposeRule LEFT_PROJECT_INCLUDE_OUTER = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + some(operand(HiveProject.class, any()))), + "Join(IncludingOuter)ProjectTransposeRule(Project-Other)", + true, HiveProject.DEFAULT_PROJECT_FACTORY); - // expand out the new projection expressions; if the join is an - // outer join, modify the expressions to reference the join output - List newProjExprs = new ArrayList(); - List projList = mergedProgram.getProjectList(); - List newJoinFields = - newJoinRel.getRowType().getFieldList(); - int nJoinFields = newJoinFields.size(); - int[] adjustments = new int[nJoinFields]; - for (int i = 0; i < nProjExprs; i++) { - RexNode newExpr = mergedProgram.expandLocalRef(projList.get(i)); - if (joinType != JoinRelType.INNER) { - newExpr = - newExpr.accept( - new RelOptUtil.RexInputConverter( - rexBuilder, - joinChildrenRowType.getFieldList(), - newJoinFields, - adjustments)); - } - newProjExprs.add(newExpr); - } + public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_INCLUDE_OUTER = + new HiveJoinProjectTransposeRule( + operand( + HiveJoin.class, + operand(RelNode.class, any()), + operand(HiveProject.class, any())), + "Join(IncludingOuter)ProjectTransposeRule(Other-Project)", + true, HiveProject.DEFAULT_PROJECT_FACTORY); - // finally, create the projection on top of the join - RelNode newProjRel = projectFactory.createProject(newJoinRel, newProjExprs, - joinRel.getRowType().getFieldNames()); - call.transformTo(newProjRel); + private HiveJoinProjectTransposeRule( + RelOptRuleOperand operand, String description, + boolean includeOuter, ProjectFactory projectFactory) { + super(operand, description, includeOuter, projectFactory); } - /** - * Creates projection expressions corresponding to one of the inputs into - * the join - * - * @param projRel the projection input into the join (if it exists) - * @param joinChild the child of the projection input (if there is a - * projection); otherwise, this is the join input - * @param adjustmentAmount the amount the expressions need to be shifted by - * @param rexBuilder rex builder - * @param joinChildrenFields concatenation of the fields from the left and - * right join inputs (once the projections have been - * removed) - * @param projects Projection expressions & names to be created - */ - private void createProjectExprs( - Project projRel, - RelNode joinChild, - int adjustmentAmount, - RexBuilder rexBuilder, - List joinChildrenFields, - List> projects) { - List childFields = - joinChild.getRowType().getFieldList(); - if (projRel != null) { - List> namedProjects = - projRel.getNamedProjects(); - int nChildFields = childFields.size(); - int[] adjustments = new int[nChildFields]; - for (int i = 0; i < nChildFields; i++) { - adjustments[i] = adjustmentAmount; - } - for (Pair pair : namedProjects) { - RexNode e = pair.left; - if (adjustmentAmount != 0) { - // shift the references by the adjustment amount - e = e.accept( - new RelOptUtil.RexInputConverter( - rexBuilder, - childFields, - joinChildrenFields, - adjustments)); - } - projects.add(Pair.of(e, pair.right)); - } - } else { - // no projection; just create references to the inputs - for (int i = 0; i < childFields.size(); i++) { - final RelDataTypeField field = childFields.get(i); - projects.add( - Pair.of( - (RexNode) rexBuilder.makeInputRef( - field.getType(), - i + adjustmentAmount), - field.getName())); - } - } - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f26d1df..73ae7c4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -914,8 +914,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { // 6.1. Merge join into multijoin operators (if possible) calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT, - HiveJoinProjectTransposeRule.LEFT_PROJECT, HiveJoinProjectTransposeRule.RIGHT_PROJECT, + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, + HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER, HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE); // The previous rules can pull up projections through join operators, // thus we run the field trimmer again to push them back down