diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 59198ba..5907590 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -619,6 +619,7 @@ HIVE_CBO_ENABLED("hive.cbo.enable", false, "Flag to control enabling Cost Based Optimizations using Optiq framework."), HIVE_CBO_MAX_JOINS_SUPPORTED("hive.cbo.max.joins.supported", 10, " Control queries that will be considered for join reordering, based on number of joins in them. Beyond a certain number of joins, the cost of considering possible permutations is prohibitive."), HIVE_CBO_PULLPROJECTABOVEJOIN_RULE("hive.cbo.project.pullabovejoin.rule", false, ""), + HIVE_CBO_GREEDY_JOIN_ORDER("hive.cbo.greedy.join.order", false, ""), // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, // need to remove by hive .13. Also, do not change default (see SMB operator) diff --git a/conf/hive-default.xml.template b/conf/hive-default.xml.template index 092cb9a..58e46f7 100644 --- a/conf/hive-default.xml.template +++ b/conf/hive-default.xml.template @@ -953,6 +953,11 @@ + hive.cbo.greedy.join.order + false + + + hive.mapjoin.bucket.cache.size 100 diff --git a/pom.xml b/pom.xml index 3fd5c10..b5a5697 100644 --- a/pom.xml +++ b/pom.xml @@ -199,17 +199,6 @@ false - - conjars - Optiq Conjars repository - http://conjars.org/repo - default - - true - always - warn - - diff --git a/ql/pom.xml b/ql/pom.xml index 6574e06..11fc936 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -28,7 +28,7 @@ Hive Query Language - 0.7 + 0.9.0-incubating-SNAPSHOT .. @@ -183,7 +183,7 @@ ${datanucleus-core.version} - net.hydromatic + org.apache.optiq optiq-core ${optiq.version} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java index 822467c..15596bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/cost/HiveVolcanoPlanner.java @@ -17,7 +17,7 @@ /** Creates a HiveVolcanoPlanner. */ public HiveVolcanoPlanner() { - super(HiveCost.FACTORY); + super(HiveCost.FACTORY, null); } public static RelOptPlanner createPlanner() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java index a2827d7..6bebcc2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveJoinRel.java @@ -81,15 +81,15 @@ public void implement(Implementor implementor) { @Override public final HiveJoinRel copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode left, - RelNode right, JoinRelType joinType) { - return copy(traitSet, conditionExpr, left, right, m_joinAlgorithm, m_mapJoinStreamingSide); + RelNode right, JoinRelType joinType, boolean semiJoinDone) { + return copy(traitSet, conditionExpr, left, right, m_joinAlgorithm, m_mapJoinStreamingSide, m_leftSemiJoin); } public HiveJoinRel copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode left, RelNode right, - JoinAlgorithm joinalgo, MapJoinStreamingRelation streamingSide) { + JoinAlgorithm joinalgo, MapJoinStreamingRelation streamingSide, boolean semiJoinDone) { try { return new HiveJoinRel(getCluster(), traitSet, left, right, conditionExpr, joinType, - variablesStopped, joinalgo, streamingSide, this.m_leftSemiJoin); + variablesStopped, joinalgo, streamingSide, semiJoinDone); } catch (InvalidRelException e) { // Semantic error not possible. Must be a bug. Convert to // internal error. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java index afb030c..d136814 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HiveRelFieldTrimmer.java @@ -547,7 +547,7 @@ public TrimResult trimFields(HiveJoinRel join, BitSet fieldsUsed, RexNode newConditionExpr = conditionExpr.accept(shuttle); final HiveJoinRel newJoin = join.copy(join.getTraitSet(), newConditionExpr, - newInputs.get(0), newInputs.get(1), join.getJoinType()); + newInputs.get(0), newInputs.get(1), join.getJoinType(), false); return new TrimResult(newJoin, mapping); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java index 461ca09..a2b8841 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RelNodeConverter.java @@ -73,6 +73,7 @@ import org.eigenbase.rex.RexCall; import org.eigenbase.rex.RexInputRef; import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexUtil; import org.eigenbase.sql.fun.SqlStdOperatorTable; import org.eigenbase.util.CompositeList; import org.eigenbase.util.Pair; @@ -83,11 +84,15 @@ public class RelNodeConverter { private static final Map AGG_MAP = ImmutableMap - . builder() - .put("count", (Aggregation) SqlStdOperatorTable.COUNT) - .put("sum", SqlStdOperatorTable.SUM).put("min", SqlStdOperatorTable.MIN) - .put("max", SqlStdOperatorTable.MAX).put("avg", SqlStdOperatorTable.AVG) - .build(); + . builder() + .put( + "count", + (Aggregation) SqlStdOperatorTable.COUNT) + .put("sum", SqlStdOperatorTable.SUM) + .put("min", SqlStdOperatorTable.MIN) + .put("max", SqlStdOperatorTable.MAX) + .put("avg", SqlStdOperatorTable.AVG) + .build(); public static RelNode convert(Operator sinkOp, RelOptCluster cluster, RelOptSchema schema, SemanticAnalyzer sA, ParseContext pCtx) { @@ -228,13 +233,16 @@ void propagatePosMap(RelNode node, RelNode parent) { opPositionMap.put(node, opPositionMap.get(parent)); } - RexNode convertToOptiqExpr(final ExprNodeDesc expr, final RelNode optiqOP, final boolean flatten) throws SemanticException { + RexNode convertToOptiqExpr(final ExprNodeDesc expr, final RelNode optiqOP, final boolean flatten) + throws SemanticException { return convertToOptiqExpr(expr, optiqOP, 0, flatten); } - RexNode convertToOptiqExpr(final ExprNodeDesc expr, final RelNode optiqOP, int offset, final boolean flatten) throws SemanticException { + RexNode convertToOptiqExpr(final ExprNodeDesc expr, final RelNode optiqOP, int offset, + final boolean flatten) throws SemanticException { ImmutableMap posMap = opPositionMap.get(optiqOP); - RexNodeConverter c = new RexNodeConverter(cluster, optiqOP.getRowType(), posMap, offset, flatten); + RexNodeConverter c = new RexNodeConverter(cluster, optiqOP.getRowType(), posMap, offset, + flatten); return c.convert(expr); } @@ -347,7 +355,8 @@ private HiveJoinRel convertJoinOp(Context ctx, JoinOperator op, JoinCondDesc jc, } } - joinRel = HiveJoinRel.getJoin(ctx.cluster, leftRel, rightRel, joinPredicate, joinType, false); + joinRel = HiveJoinRel.getJoin(ctx.cluster, leftRel, rightRel, joinPredicate, joinType, + false); } else { throw new RuntimeException("Right & Left of Join Condition columns are not equal"); } @@ -405,15 +414,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Context ctx = (Context) procCtx; HiveRel input = (HiveRel) ctx.getParentNode((Operator) nd, 0); FilterOperator filterOp = (FilterOperator) nd; - RexNode convertedFilterExpr = ctx - .convertToOptiqExpr(filterOp.getConf().getPredicate(), input, true); + RexNode convertedFilterExpr = ctx.convertToOptiqExpr(filterOp.getConf().getPredicate(), + input, true); // Flatten the condition otherwise Optiq chokes on assertion // (FilterRelBase) if (convertedFilterExpr instanceof RexCall) { RexCall call = (RexCall) convertedFilterExpr; - convertedFilterExpr = ctx.cluster.getRexBuilder().makeFlatCall(call.getOperator(), - call.getOperands()); + convertedFilterExpr = ctx.cluster.getRexBuilder().makeCall(call.getType(), + call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator())); } HiveRel filtRel = new HiveFilterRel(ctx.cluster, ctx.cluster.traitSetOf(HiveRel.CONVENTION), @@ -553,7 +562,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, /* * numReducers == 1 and order.length = 1 => a RS for CrossJoin. */ - if ( order.length() == 0 ) { + if (order.length() == 0) { Operator op = (Operator) nd; ctx.hiveOpToRelNode.put(op, input); return input; @@ -609,13 +618,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, TableScanOperator tableScanOp = (TableScanOperator) nd; RowResolver rr = ctx.sA.getRowResolver(tableScanOp); - List neededCols = new ArrayList( - tableScanOp.getNeededColumns()); + List neededCols = new ArrayList(tableScanOp.getNeededColumns()); Statistics stats = tableScanOp.getStatistics(); try { - stats = addPartitionColumns(ctx, tableScanOp, tableScanOp.getConf() - .getAlias(), ctx.sA.getTable(tableScanOp), stats, neededCols); + stats = addPartitionColumns(ctx, tableScanOp, tableScanOp.getConf().getAlias(), + ctx.sA.getTable(tableScanOp), stats, neededCols); } catch (CloneNotSupportedException ce) { throw new SemanticException(ce); } @@ -637,9 +645,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, /* * Add partition columns to needed columns and fake the COlStats for it. */ - private Statistics addPartitionColumns(Context ctx, - TableScanOperator tableScanOp, String tblAlias, Table tbl, - Statistics stats, List neededCols) + private Statistics addPartitionColumns(Context ctx, TableScanOperator tableScanOp, + String tblAlias, Table tbl, Statistics stats, List neededCols) throws CloneNotSupportedException { if (!tbl.isPartitioned()) { return stats; @@ -648,11 +655,9 @@ private Statistics addPartitionColumns(Context ctx, List pCols = tbl.getPartCols(); for (FieldSchema pC : pCols) { neededCols.add(pC.getName()); - ColStatistics cStats = stats.getColumnStatisticsForColumn(tblAlias, - pC.getName()); + ColStatistics cStats = stats.getColumnStatisticsForColumn(tblAlias, pC.getName()); if (cStats == null) { - PrunedPartitionList partList = ctx.parseCtx.getOpToPartList().get( - tableScanOp); + PrunedPartitionList partList = ctx.parseCtx.getOpToPartList().get(tableScanOp); cStats = new ColStatistics(tblAlias, pC.getName(), pC.getType()); cStats.setCountDistint(partList.getPartitions().size()); pStats.add(cStats); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java index 507095b..24ced85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/RexNodeConverter.java @@ -37,6 +37,7 @@ import org.eigenbase.rex.RexBuilder; import org.eigenbase.rex.RexCall; import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexUtil; import org.eigenbase.sql.SqlOperator; import org.eigenbase.sql.fun.SqlCastFunction; import org.eigenbase.sql.type.SqlTypeName; @@ -126,14 +127,17 @@ private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticExcep // This is an explicit cast RexNode expr = null; + RelDataType retType = null; expr = handleExplicitCast(func, childRexNodeLst); if (expr == null) { - RelDataType retType = (expr != null) ? expr.getType() : TypeConverter.convert( - func.getTypeInfo(), m_cluster.getTypeFactory()); + retType = (expr != null) ? expr.getType() : TypeConverter.convert(func.getTypeInfo(), + m_cluster.getTypeFactory()); SqlOperator optiqOp = SqlFunctionConverter.getOptiqOperator(func.getGenericUDF(), argTypeBldr.build(), retType); expr = m_cluster.getRexBuilder().makeCall(optiqOp, childRexNodeLst); + } else { + retType = expr.getType(); } // TODO: Cast Function in Optiq have a bug where it infertype on cast throws @@ -141,7 +145,8 @@ private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticExcep if (m_flattenExpr && (expr instanceof RexCall) && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) { RexCall call = (RexCall) expr; - expr = m_cluster.getRexBuilder().makeFlatCall(call.getOperator(), call.getOperands()); + expr = m_cluster.getRexBuilder().makeCall(retType, call.getOperator(), + RexUtil.flatten(call.getOperands(), call.getOperator())); } return expr; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index b5ccf80..7ecada3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -223,7 +223,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.InputFormat; - import org.eigenbase.rel.AggregateCall; import org.eigenbase.rel.Aggregation; import org.eigenbase.rel.InvalidRelException; @@ -235,13 +234,17 @@ import org.eigenbase.rel.metadata.CachingRelMetadataProvider; import org.eigenbase.rel.metadata.ChainedRelMetadataProvider; import org.eigenbase.rel.metadata.RelMetadataProvider; +import org.eigenbase.rel.rules.ConvertMultiJoinRule; +import org.eigenbase.rel.rules.LoptOptimizeJoinRule; import org.eigenbase.relopt.RelOptCluster; import org.eigenbase.relopt.RelOptPlanner; import org.eigenbase.relopt.RelOptQuery; import org.eigenbase.relopt.RelOptRule; import org.eigenbase.relopt.RelOptSchema; import org.eigenbase.relopt.RelTraitSet; +import org.eigenbase.relopt.hep.HepMatchOrder; import org.eigenbase.relopt.hep.HepPlanner; +import org.eigenbase.relopt.hep.HepProgram; import org.eigenbase.relopt.hep.HepProgramBuilder; import org.eigenbase.reltype.RelDataType; import org.eigenbase.reltype.RelDataTypeFactory; @@ -254,6 +257,7 @@ import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; /** @@ -11801,13 +11805,14 @@ private ASTNode getOptimizedAST() throws SemanticException { } @Override - public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, - SchemaPlus rootSchema) { - RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); - + public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { + RelNode optiqGenPlan = null; + RelNode optiqPreCboPlan = null; + RelNode optiqOptimizedPlan = null; /* * recreate cluster, so that it picks up the additional traitDef */ + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); @@ -11816,46 +11821,59 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, m_relOptSchema = relOptSchema; m_rootSchema = rootSchema; - RelNode optiqPlan = null; try { - optiqPlan = genLogicalPlan(qb); + optiqGenPlan = genLogicalPlan(qb); } catch (SemanticException e) { m_semanticException = e; throw new RuntimeException(e); } - optiqPlan = applyPreCBOTransforms(optiqPlan, - HiveDefaultRelMetadataProvider.INSTANCE); - + optiqPreCboPlan = applyPreCBOTransforms(optiqGenPlan, HiveDefaultRelMetadataProvider.INSTANCE); List list = Lists.newArrayList(); list.add(HiveDefaultRelMetadataProvider.INSTANCE); - planner.registerMetadataProviders(list); - RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - cluster.setMetadataProvider(new CachingRelMetadataProvider( - chainedProvider, planner)); + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_GREEDY_JOIN_ORDER)) { + planner.registerMetadataProviders(list); - planner.addRule(HiveSwapJoinRule.INSTANCE); - planner.addRule(HivePushJoinThroughJoinRule.LEFT); - planner.addRule(HivePushJoinThroughJoinRule.RIGHT); - if (HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVE_CBO_PULLPROJECTABOVEJOIN_RULE)) { - planner.addRule(HivePullUpProjectsAboveJoinRule.BOTH_PROJECT); - planner.addRule(HivePullUpProjectsAboveJoinRule.LEFT_PROJECT); - planner.addRule(HivePullUpProjectsAboveJoinRule.RIGHT_PROJECT); - planner.addRule(HiveMergeProjectRule.INSTANCE); - } + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, planner)); + + planner.addRule(HiveSwapJoinRule.INSTANCE); + planner.addRule(HivePushJoinThroughJoinRule.LEFT); + planner.addRule(HivePushJoinThroughJoinRule.RIGHT); + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_PULLPROJECTABOVEJOIN_RULE)) { + planner.addRule(HivePullUpProjectsAboveJoinRule.BOTH_PROJECT); + planner.addRule(HivePullUpProjectsAboveJoinRule.LEFT_PROJECT); + planner.addRule(HivePullUpProjectsAboveJoinRule.RIGHT_PROJECT); + planner.addRule(HiveMergeProjectRule.INSTANCE); - RelTraitSet desiredTraits = cluster.traitSetOf(HiveRel.CONVENTION, - RelCollationImpl.EMPTY); + RelTraitSet desiredTraits = cluster + .traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY); - RelNode rootRel = optiqPlan; - if (!optiqPlan.getTraitSet().equals(desiredTraits)) { - rootRel = planner.changeTraits(optiqPlan, desiredTraits); + RelNode rootRel = optiqPreCboPlan; + if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) { + rootRel = planner.changeTraits(optiqPreCboPlan, desiredTraits); + } + planner.setRoot(rootRel); + + optiqOptimizedPlan = planner.findBestExp(); + } + } else { + final HepProgram hepPgm = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new ConvertMultiJoinRule(HiveJoinRel.class)) + .addRuleInstance(LoptOptimizeJoinRule.INSTANCE).build(); + + HepPlanner hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + + hepPlanner.setRoot(optiqPreCboPlan); + optiqOptimizedPlan = hepPlanner.findBestExp(); } - planner.setRoot(rootRel); - return planner.findBestExp(); + return optiqOptimizedPlan; } public RelNode applyPreCBOTransforms(RelNode basePlan,