Index: ql/src/test/results/clientpositive/join18.q.out =================================================================== --- ql/src/test/results/clientpositive/join18.q.out (revision 0) +++ ql/src/test/results/clientpositive/join18.q.out (revision 0) @@ -0,0 +1,542 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF src1 key) key) (TOK_SELEXPR (TOK_FUNCTION count (TOK_COLREF src1 value)) value)) (TOK_GROUPBY (TOK_COLREF src1 key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src1 src2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF src2 key) key) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_COLREF src2 value)) value)) (TOK_GROUPBY (TOK_COLREF src2 key)))) b) (= (TOK_COLREF a key) (TOK_COLREF b key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF a key)) (TOK_SELEXPR (TOK_COLREF a value)) (TOK_SELEXPR (TOK_COLREF b key)) (TOK_SELEXPR (TOK_COLREF b value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-5 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-2, Stage-5 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b:src2 + Reduce Output Operator + key expressions: + expr: key + type: string + expr: value + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY.1) + keys: + expr: KEY.0 + type: string + mode: partial1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + /tmp/hive-njain/73868771/373350713.10002 + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: unknown + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME1 + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: 0 + value expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {VALUE.0} {VALUE.1} + 1 {VALUE.0} {VALUE.1} + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + expr: 2 + type: string + expr: 3 + type: bigint + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + $INTNAME + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: 1 + value expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {VALUE.0} {VALUE.1} + 1 {VALUE.0} {VALUE.1} + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + expr: 2 + type: string + expr: 3 + type: bigint + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + a:src1 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: value + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: partial1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + /tmp/hive-njain/73868771/373350713.10004 + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: unknown + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +NULL NULL 6 +0 3 NULL NULL +10 1 NULL NULL +100 2 NULL NULL +103 2 NULL NULL +104 2 NULL NULL +105 1 NULL NULL +11 1 NULL NULL +111 1 NULL NULL +113 2 NULL NULL +114 1 NULL NULL +116 1 NULL NULL +118 2 NULL NULL +119 3 NULL NULL +12 2 NULL NULL +120 2 NULL NULL +125 2 NULL NULL +126 1 NULL NULL +128 3 128 0 +129 2 NULL NULL +131 1 NULL NULL +133 1 NULL NULL +134 2 NULL NULL +136 1 NULL NULL +137 2 NULL NULL +138 4 NULL NULL +143 1 NULL NULL +145 1 NULL NULL +146 2 146 1 +149 2 NULL NULL +15 2 NULL NULL +150 1 150 1 +152 2 NULL NULL +153 1 NULL NULL +155 1 NULL NULL +156 1 NULL NULL +157 1 NULL NULL +158 1 NULL NULL +160 1 NULL NULL +162 1 NULL NULL +163 1 NULL NULL +164 2 NULL NULL +165 2 NULL NULL +166 1 NULL NULL +167 3 NULL NULL +168 1 NULL NULL +169 4 NULL NULL +17 1 NULL NULL +170 1 NULL NULL +172 2 NULL NULL +174 2 NULL NULL +175 2 NULL NULL +176 2 NULL NULL +177 1 NULL NULL +178 1 NULL NULL +179 2 NULL NULL +18 2 NULL NULL +180 1 NULL NULL +181 1 NULL NULL +183 1 NULL NULL +186 1 NULL NULL +187 3 NULL NULL +189 1 NULL NULL +19 1 NULL NULL +190 1 NULL NULL +191 2 NULL NULL +192 1 NULL NULL +193 3 NULL NULL +194 1 NULL NULL +195 2 NULL NULL +196 1 NULL NULL +197 2 NULL NULL +199 3 NULL NULL +2 1 NULL NULL +20 1 NULL NULL +200 2 NULL NULL +201 1 NULL NULL +202 1 NULL NULL +203 2 NULL NULL +205 2 NULL NULL +207 2 NULL NULL +208 3 NULL NULL +209 2 NULL NULL +213 2 213 1 +214 1 NULL NULL +216 2 NULL NULL +217 2 NULL NULL +218 1 NULL NULL +219 2 NULL NULL +221 2 NULL NULL +222 1 NULL NULL +223 2 NULL NULL +224 2 224 0 +226 1 NULL NULL +228 1 NULL NULL +229 2 NULL NULL +230 5 NULL NULL +233 2 NULL NULL +235 1 NULL NULL +237 2 NULL NULL +238 2 238 1 +239 2 NULL NULL +24 2 NULL NULL +241 1 NULL NULL +242 2 NULL NULL +244 1 NULL NULL +247 1 NULL NULL +248 1 NULL NULL +249 1 NULL NULL +252 1 NULL NULL +255 2 255 1 +256 2 NULL NULL +257 1 NULL NULL +258 1 NULL NULL +26 2 NULL NULL +260 1 NULL NULL +262 1 NULL NULL +263 1 NULL NULL +265 2 NULL NULL +266 1 NULL NULL +27 1 NULL NULL +272 2 NULL NULL +273 3 273 1 +274 1 NULL NULL +275 1 NULL NULL +277 4 NULL NULL +278 2 278 1 +28 1 NULL NULL +280 2 NULL NULL +281 2 NULL NULL +282 2 NULL NULL +283 1 NULL NULL +284 1 NULL NULL +285 1 NULL NULL +286 1 NULL NULL +287 1 NULL NULL +288 2 NULL NULL +289 1 NULL NULL +291 1 NULL NULL +292 1 NULL NULL +296 1 NULL NULL +298 3 NULL NULL +30 1 NULL NULL +302 1 NULL NULL +305 1 NULL NULL +306 1 NULL NULL +307 2 NULL NULL +308 1 NULL NULL +309 2 NULL NULL +310 1 NULL NULL +311 3 311 1 +315 1 NULL NULL +316 3 NULL NULL +317 2 NULL NULL +318 3 NULL NULL +321 2 NULL NULL +322 2 NULL NULL +323 1 NULL NULL +325 2 NULL NULL +327 3 NULL NULL +33 1 NULL NULL +331 2 NULL NULL +332 1 NULL NULL +333 2 NULL NULL +335 1 NULL NULL +336 1 NULL NULL +338 1 NULL NULL +339 1 NULL NULL +34 1 NULL NULL +341 1 NULL NULL +342 2 NULL NULL +344 2 NULL NULL +345 1 NULL NULL +348 5 NULL NULL +35 3 NULL NULL +351 1 NULL NULL +353 2 NULL NULL +356 1 NULL NULL +360 1 NULL NULL +362 1 NULL NULL +364 1 NULL NULL +365 1 NULL NULL +366 1 NULL NULL +367 2 NULL NULL +368 1 NULL NULL +369 3 369 0 +37 2 NULL NULL +373 1 NULL NULL +374 1 NULL NULL +375 1 NULL NULL +377 1 NULL NULL +378 1 NULL NULL +379 1 NULL NULL +382 2 NULL NULL +384 3 NULL NULL +386 1 NULL NULL +389 1 NULL NULL +392 1 NULL NULL +393 1 NULL NULL +394 1 NULL NULL +395 2 NULL NULL +396 3 NULL NULL +397 2 NULL NULL +399 2 NULL NULL +4 1 NULL NULL +400 1 NULL NULL +401 5 401 1 +402 1 NULL NULL +403 3 NULL NULL +404 2 NULL NULL +406 4 406 1 +407 1 NULL NULL +409 3 NULL NULL +41 1 NULL NULL +411 1 NULL NULL +413 2 NULL NULL +414 2 NULL NULL +417 3 NULL NULL +418 1 NULL NULL +419 1 NULL NULL +42 2 NULL NULL +421 1 NULL NULL +424 2 NULL NULL +427 1 NULL NULL +429 2 NULL NULL +43 1 NULL NULL +430 3 NULL NULL +431 3 NULL NULL +432 1 NULL NULL +435 1 NULL NULL +436 1 NULL NULL +437 1 NULL NULL +438 3 NULL NULL +439 2 NULL NULL +44 1 NULL NULL +443 1 NULL NULL +444 1 NULL NULL +446 1 NULL NULL +448 1 NULL NULL +449 1 NULL NULL +452 1 NULL NULL +453 1 NULL NULL +454 3 NULL NULL +455 1 NULL NULL +457 1 NULL NULL +458 2 NULL NULL +459 2 NULL NULL +460 1 NULL NULL +462 2 NULL NULL +463 2 NULL NULL +466 3 NULL NULL +467 1 NULL NULL +468 4 NULL NULL +469 5 NULL NULL +47 1 NULL NULL +470 1 NULL NULL +472 1 NULL NULL +475 1 NULL NULL +477 1 NULL NULL +478 2 NULL NULL +479 1 NULL NULL +480 3 NULL NULL +481 1 NULL NULL +482 1 NULL NULL +483 1 NULL NULL +484 1 NULL NULL +485 1 NULL NULL +487 1 NULL NULL +489 4 NULL NULL +490 1 NULL NULL +491 1 NULL NULL +492 2 NULL NULL +493 1 NULL NULL +494 1 NULL NULL +495 1 NULL NULL +496 1 NULL NULL +497 1 NULL NULL +498 3 NULL NULL +5 3 NULL NULL +51 2 NULL NULL +53 1 NULL NULL +54 1 NULL NULL +57 1 NULL NULL +58 2 NULL NULL +64 1 NULL NULL +65 1 NULL NULL +66 1 66 1 +67 2 NULL NULL +69 1 NULL NULL +70 3 NULL NULL +72 2 NULL NULL +74 1 NULL NULL +76 2 NULL NULL +77 1 NULL NULL +78 1 NULL NULL +8 1 NULL NULL +80 1 NULL NULL +82 1 NULL NULL +83 2 NULL NULL +84 2 NULL NULL +85 1 NULL NULL +86 1 NULL NULL +87 1 NULL NULL +9 1 NULL NULL +90 3 NULL NULL +92 1 NULL NULL +95 2 NULL NULL +96 1 NULL NULL +97 2 NULL NULL +98 2 98 1 Index: ql/src/test/queries/clientpositive/join18.q =================================================================== --- ql/src/test/queries/clientpositive/join18.q (revision 0) +++ ql/src/test/queries/clientpositive/join18.q (revision 0) @@ -0,0 +1,24 @@ +EXPLAIN + SELECT a.key, a.value, b.key, b.value + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key); + + SELECT a.key, a.value, b.key, b.value + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 725929) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -116,11 +116,19 @@ * @param task for the old task for the current reducer * @param opProcCtx processing context */ - public static void joinPlan(Task task, GenMRProcContext opProcCtx) throws SemanticException { + public static void joinPlan(ReduceSinkOperator op, + Task oldTask, + Task task, + GenMRProcContext opProcCtx) throws SemanticException { Task currTask = task; mapredWork plan = (mapredWork) currTask.getWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); + // terminate the old task and make current task dependent on it + if (oldTask != null) { + mergeTasks(op, oldTask, currTask, opProcCtx); + } + if (currTopOp != null) { List> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); @@ -132,6 +140,8 @@ currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); } + + opProcCtx.setCurrTask(currTask); } /** @@ -139,7 +149,8 @@ * @param op the reduce sink operator encountered * @param opProcCtx processing context */ - public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) { + public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) + throws SemanticException { // Generate a new task mapredWork cplan = getMapRedWork(); ParseContext parseCtx = opProcCtx.getParseCtx(); @@ -157,75 +168,9 @@ HashMap, Task> opTaskMap = opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, redTask); - - // generate the temporary file - String scratchDir = opProcCtx.getScratchDir(); - int randomid = opProcCtx.getRandomId(); - int pathid = opProcCtx.getPathId(); - - String taskTmpDir = scratchDir + File.separator + randomid + '.' + pathid ; - pathid++; - opProcCtx.setPathId(pathid); - - Operator parent = op.getParentOperators().get(0); - tableDesc tt_desc = - PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); - - // Create a file sink operator for this file name - Operator fs_op = - putOpInsertMap(OperatorFactory.get - (new fileSinkDesc(taskTmpDir, tt_desc, - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), - parent.getSchema()), null, parseCtx); - - // replace the reduce child with this operator - List> childOpList = parent.getChildOperators(); - for (int pos = 0; pos < childOpList.size(); pos++) { - if (childOpList.get(pos) == op) { - childOpList.set(pos, fs_op); - break; - } - } - - List> parentOpList = new ArrayList>(); - parentOpList.add(parent); - fs_op.setParentOperators(parentOpList); - - // Add the path to alias mapping - if (cplan.getPathToAliases().get(taskTmpDir) == null) { - cplan.getPathToAliases().put(taskTmpDir, new ArrayList()); - } - - String streamDesc; - if (reducer.getClass() == JoinOperator.class) - streamDesc = "$INTNAME"; - else - streamDesc = taskTmpDir; - - cplan.getPathToAliases().get(taskTmpDir).add(streamDesc); - cplan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null)); - cplan.getAliasToWork().put(streamDesc, op); - setKeyAndValueDesc(cplan, op); - - // Make this task dependent on the current task Task currTask = opProcCtx.getCurrTask(); - currTask.addDependentTask(redTask); - - // TODO: Allocate work to remove the temporary files and make that - // dependent on the redTask - if (reducer.getClass() == JoinOperator.class) - cplan.setNeedsTagging(true); - - Operator currTopOp = opProcCtx.getCurrTopOp(); - String currAliasId = opProcCtx.getCurrAliasId(); - currTopOp = null; - currAliasId = null; - currTask = redTask; - - opProcCtx.setCurrTask(currTask); - opProcCtx.setCurrTopOp(currTopOp); - opProcCtx.setCurrAliasId(currAliasId); + mergeTasks(op, currTask, redTask, opProcCtx); opProcCtx.getRootOps().add(op); } @@ -343,4 +288,95 @@ parseCtx.getOpParseCtx().put(op, ctx); return op; } + + @SuppressWarnings("nls") + /** + * Merge the tasks - by creating a temporary file between them. + * @param op reduce sink operator being processed + * @param oldTask the parent task + * @param task the child task + * @param opProcCtx context + **/ + private static void mergeTasks(ReduceSinkOperator op, + Task oldTask, + Task task, + GenMRProcContext opProcCtx) throws SemanticException { + Task currTask = task; + mapredWork plan = (mapredWork) currTask.getWork(); + Operator currTopOp = opProcCtx.getCurrTopOp(); + + ParseContext parseCtx = opProcCtx.getParseCtx(); + oldTask.addDependentTask(currTask); + + // generate the temporary file + String scratchDir = opProcCtx.getScratchDir(); + int randomid = opProcCtx.getRandomId(); + int pathid = opProcCtx.getPathId(); + + String taskTmpDir = scratchDir + File.separator + randomid + '.' + pathid ; + pathid++; + opProcCtx.setPathId(pathid); + + Operator parent = op.getParentOperators().get(0); + tableDesc tt_desc = + PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); + + // Create a file sink operator for this file name + Operator fs_op = + putOpInsertMap(OperatorFactory.get + (new fileSinkDesc(taskTmpDir, tt_desc, + parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), + parent.getSchema()), null, parseCtx); + + // replace the reduce child with this operator + List> childOpList = parent.getChildOperators(); + for (int pos = 0; pos < childOpList.size(); pos++) { + if (childOpList.get(pos) == op) { + childOpList.set(pos, fs_op); + break; + } + } + + List> parentOpList = new ArrayList>(); + parentOpList.add(parent); + fs_op.setParentOperators(parentOpList); + + Operator reducer = op.getChildOperators().get(0); + + String streamDesc; + mapredWork cplan = (mapredWork) currTask.getWork(); + + if (reducer.getClass() == JoinOperator.class) { + String origStreamDesc; + streamDesc = "$INTNAME"; + origStreamDesc = streamDesc; + int pos = 0; + while (cplan.getAliasToWork().get(streamDesc) != null) + streamDesc = origStreamDesc.concat(String.valueOf(++pos)); + } + else + streamDesc = taskTmpDir; + + // Add the path to alias mapping + if (cplan.getPathToAliases().get(taskTmpDir) == null) { + cplan.getPathToAliases().put(taskTmpDir, new ArrayList()); + } + + cplan.getPathToAliases().get(taskTmpDir).add(streamDesc); + cplan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null)); + cplan.getAliasToWork().put(streamDesc, op); + setKeyAndValueDesc(cplan, op); + + // TODO: Allocate work to remove the temporary files and make that + // dependent on the redTask + if (reducer.getClass() == JoinOperator.class) + cplan.setNeedsTagging(true); + + currTopOp = null; + String currAliasId = null; + + opProcCtx.setCurrTopOp(currTopOp); + opProcCtx.setCurrAliasId(currAliasId); + opProcCtx.setCurrTask(currTask); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (revision 725929) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (working copy) @@ -92,8 +92,9 @@ // This will happen in case of joins. The current plan can be thrown away after being merged with the // original plan else { - GenMapRedUtils.joinPlan(opMapTask, ctx); + GenMapRedUtils.joinPlan(op, null, opMapTask, ctx); currTask = opMapTask; + ctx.setCurrTask(currTask); } mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (revision 725929) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (working copy) @@ -67,8 +67,9 @@ if (opMapTask == null) GenMapRedUtils.splitPlan(op, ctx); else { - GenMapRedUtils.joinPlan(opMapTask, ctx); + GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx); currTask = opMapTask; + ctx.setCurrTask(currTask); } mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (revision 725929) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (working copy) @@ -331,9 +331,10 @@ throws Exception { out.print(indentString(indent)); out.println("STAGE PLANS:"); + HashSet> displayedSet = new HashSet>(); for(Task rootTask: rootTasks) { outputPlan(rootTask, out, work.getExtended(), - new HashSet>(), indent+2); + displayedSet, indent+2); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 725929) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -3033,7 +3033,7 @@ opRules.put(new RuleRegExp(new String("R0"), ".*"), new GenMROperator()); opRules.put(new RuleRegExp(new String("R1"), "TS"), new GenMRTableScan1()); opRules.put(new RuleRegExp(new String("R2"), "TS.*RS"), new GenMRRedSink1()); - opRules.put(new RuleRegExp(new String("R3"), "RS.*RS"), new GenMRRedSink2()); + opRules.put(new RuleRegExp(new String("R3"), ".*RS.*RS"), new GenMRRedSink2()); opRules.put(new RuleRegExp(new String("R4"), ".*FS"), new GenMRFileSink1()); // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along