diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 6c7dc58..4bfc26f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.plan.OpTraits; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; + /** * SparkMapJoinOptimizer cloned from ConvertJoinMapJoin is an optimization that replaces a common join * (aka shuffle join) with a map join (aka broadcast or fragment replicate @@ -89,6 +90,7 @@ LOG.info("Estimated number of buckets " + numBuckets); int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets); + /* TODO: handle this later if (mapJoinConversionPos < 0) { // we cannot convert to bucket map join, we cannot convert to @@ -153,14 +155,15 @@ mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1); - /* if (mapJoinConversionPos < 0) { // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. + /* int pos = 0; // it doesn't matter which position we use in this case. convertJoinSMBJoin(joinOp, context, pos, 0, false, false); + */ return null; - }*/ + } MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos); // map join operator by default has no bucket cols @@ -271,6 +274,7 @@ private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeSparkProcContext co mergeJoinOp.cloneOriginalParentsList(mergeJoinOp.getParentOperators()); } */ + private void setAllChildrenTraitsToNull(Operator currentOp) { if (currentOp instanceof ReduceSinkOperator) { return;