Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1004121) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -186,7 +186,7 @@ TableDesc tt_desc; Operator rootOp; - if (mjCtx.getOldMapJoin() == null) { + if (mjCtx.getOldMapJoin() == null || setReducer) { taskTmpDir = mjCtx.getTaskTmpDir(); tt_desc = mjCtx.getTTDesc(); rootOp = mjCtx.getRootMapJoinOp(); Index: ql/src/test/queries/clientpositive/mapjoin_mapjoin.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (revision 0) +++ ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (revision 0) @@ -0,0 +1,5 @@ +explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key); + +explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds; + +select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds; Index: ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (revision 0) +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (revision 0) @@ -0,0 +1,352 @@ +PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src + TableScan + alias: src + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/amarsri/hive_2010-10-03_23-32-11_193_4494089607373072455/-mr-10002 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {ds} + 1 + handleSkewJoin: false + keys: + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0, _col2 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src + TableScan + alias: src + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {ds} + 1 + handleSkewJoin: false + keys: + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0, _col2 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10002 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: string + outputColumnNames: _col0, _col2 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col2} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col2 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col2} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col2 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10003 + Select Operator + expressions: + expr: _col2 + type: string + outputColumnNames: _col2 + Select Operator + expressions: + expr: _col2 + type: string + outputColumnNames: _col2 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col2 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000 +POSTHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000 +5308 +5308