Index: ql/src/test/results/clientpositive/smb_mapjoin_14.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (working copy) @@ -160,7 +160,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -232,7 +232,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out (working copy) @@ -322,7 +322,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -394,7 +394,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -506,13 +506,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-9 depends on stages: Stage-1, Stage-5 , consists of Stage-10, Stage-11, Stage-2 - Stage-10 has a backup stage: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-5 , consists of Stage-10, Stage-11, Stage-3 + Stage-10 has a backup stage: Stage-3 Stage-7 depends on stages: Stage-10 - Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-11 has a backup stage: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-7, Stage-8 + Stage-11 has a backup stage: Stage-3 Stage-8 depends on stages: Stage-11 - Stage-2 + Stage-3 Stage-5 is a root stage Stage-0 is a root stage @@ -640,7 +640,7 @@ Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -722,7 +722,7 @@ Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: $INTNAME @@ -2695,7 +2695,7 @@ Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-3 depends on stages: Stage-1, Stage-4, Stage-5 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -2795,7 +2795,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -3067,13 +3067,13 @@ Stage-12 is a root stage , consists of Stage-18, Stage-19, Stage-1 Stage-18 has a backup stage: Stage-1 Stage-10 depends on stages: Stage-18 - Stage-9 depends on stages: Stage-1, Stage-5, Stage-10, Stage-11, Stage-13, Stage-14 , consists of Stage-16, Stage-17, Stage-2 - Stage-16 has a backup stage: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-5, Stage-10, Stage-11, Stage-13, Stage-14 , consists of Stage-16, Stage-17, Stage-3 + Stage-16 has a backup stage: Stage-3 Stage-7 depends on stages: Stage-16 - Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-17 has a backup stage: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-7, Stage-8 + Stage-17 has a backup stage: Stage-3 Stage-8 depends on stages: Stage-17 - Stage-2 + Stage-3 Stage-19 has a backup stage: Stage-1 Stage-11 depends on stages: Stage-19 Stage-1 @@ -3234,7 +3234,7 @@ Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -3316,7 +3316,7 @@ Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: $INTNAME Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out (working copy) @@ -73,7 +73,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -124,7 +124,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator @@ -271,7 +271,7 @@ Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -364,7 +364,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-8 Index: ql/src/test/results/clientpositive/smb_mapjoin_6.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_6.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/smb_mapjoin_6.q.out (working copy) @@ -1316,13 +1316,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -1363,10 +1363,10 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -1382,10 +1382,10 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1398,7 +1398,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1411,7 +1411,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -2858,13 +2858,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -2909,10 +2909,10 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -2928,10 +2928,10 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -2944,7 +2944,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -2957,7 +2957,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out (working copy) @@ -162,7 +162,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -234,7 +234,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -346,8 +346,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-1, Stage-5 + Stage-4 depends on stages: Stage-3 Stage-5 is a root stage Stage-0 is a root stage @@ -420,7 +420,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: $INTNAME @@ -476,7 +476,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (working copy) @@ -62,13 +62,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -197,10 +197,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -228,11 +228,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -298,7 +298,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -364,7 +364,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/bucketmapjoin2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin2.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketmapjoin2.q.out (working copy) @@ -79,13 +79,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -225,10 +225,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -256,11 +256,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -326,7 +326,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -392,7 +392,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -1272,13 +1272,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -1423,10 +1423,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -1459,11 +1459,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1544,7 +1544,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1625,7 +1625,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/smb_mapjoin9.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin9.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/smb_mapjoin9.q.out (working copy) @@ -247,14 +247,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-9 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-9 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-9 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-9 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -298,10 +298,10 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat name: default.smb_mapjoin9_results - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -324,10 +324,10 @@ name: smb_mapjoin9_results isExternal: false - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -339,7 +339,7 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat name: default.smb_mapjoin9_results - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -351,7 +351,7 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat name: default.smb_mapjoin9_results - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/smb_mapjoin_25.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_25.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_25.q.out (revision 0) @@ -0,0 +1,371 @@ +PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: type: LOAD +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: type: LOAD +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: type: LOAD +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) 5)))) t1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) c) (TOK_TABREF (TOK_TABNAME smb_bucket_3) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL c) key) 5)))) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL t2) key) 5)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key = 5) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + t1:b + TableScan + alias: b + Filter Operator + predicate: + expr: (key = 5) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + value expressions: + expr: _col0 + type: int + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 1 + value expressions: + expr: _col0 + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (_col1 = 5) + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + t2:c + TableScan + alias: c + Filter Operator + predicate: + expr: (key = 5) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + t2:d + TableScan + alias: d + Filter Operator + predicate: + expr: (key = 5) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- explain +-- select * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join src c on a.key=c.value + +-- select a.key from smb_bucket_1 a + +explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +PREHOOK: type: QUERY +POSTHOOK: query: -- explain +-- select * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join src c on a.key=c.value + +-- select a.key from smb_bucket_1 a + +explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) 5)))) t1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) c) (TOK_TABREF (TOK_TABNAME smb_bucket_3) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL c) key) 5)))) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL t2) key) 5)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key = 5) + type: boolean + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + value expressions: + expr: _col0 + type: int + t2:c + TableScan + alias: c + Filter Operator + predicate: + expr: (key = 5) + type: boolean + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 1 + value expressions: + expr: _col0 + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (_col1 = 5) + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### Index: ql/src/test/results/clientpositive/smb_mapjoin_11.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_11.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/smb_mapjoin_11.q.out (working copy) @@ -61,7 +61,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -198,7 +198,7 @@ name: default.test_table3 #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out (working copy) @@ -78,7 +78,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -129,7 +129,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator @@ -454,7 +454,7 @@ Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -555,7 +555,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-8 @@ -1028,7 +1028,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1079,7 +1079,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator @@ -1251,7 +1251,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1302,7 +1302,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator @@ -1513,7 +1513,7 @@ Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -1614,7 +1614,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-8 Index: ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out (working copy) @@ -71,9 +71,21 @@ POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key + +-- Three tests below are all the same query with different alias, which changes dispatch order of GenMapRedWalker +-- This is dependent to iteration order of HashMap, so can be meaningless in non-sun jdk +-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- c = TS[1]-RS[7]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value PREHOOK: type: QUERY POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key + +-- Three tests below are all the same query with different alias, which changes dispatch order of GenMapRedWalker +-- This is dependent to iteration order of HashMap, so can be meaningless in non-sun jdk +-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- c = TS[1]-RS[7]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value POSTHOOK: type: QUERY POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -88,12 +100,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a @@ -154,7 +166,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -209,6 +221,292 @@ POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 2654 +PREHOOK: query: -- d = TS[0]-RS[7]-JOIN[8]-SEL[9]-FS[10] +-- b = TS[1]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- d = TS[0]-RS[7]-JOIN[8]-SEL[9]-FS[10] +-- b = TS[1]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) d) (= (. (TOK_TABLE_OR_COL d) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col1 + Position of Big Table: 0 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 0 + d + TableScan + alias: d + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +2654 +PREHOOK: query: -- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- a = TS[1]-MAPJOIN[11] +-- h = TS[2]-RS[7]-JOIN[8] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- a = TS[1]-MAPJOIN[11] +-- h = TS[2]-RS[7]-JOIN[8] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) h) (= (. (TOK_TABLE_OR_COL h) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-3 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col1 + Position of Big Table: 0 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 0 + h + TableScan + alias: h + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +2654 PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on the same key explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key PREHOOK: type: QUERY @@ -703,12 +1001,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME tbl4) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a @@ -769,7 +1067,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -842,12 +1140,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a @@ -908,7 +1206,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1746,12 +2044,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME tbl4) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a @@ -1812,7 +2110,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/bucketmapjoin4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin4.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketmapjoin4.q.out (working copy) @@ -96,13 +96,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -239,10 +239,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -270,11 +270,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -340,7 +340,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -406,7 +406,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out (working copy) @@ -73,7 +73,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -124,7 +124,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator @@ -240,7 +240,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -291,7 +291,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Index: ql/src/test/results/clientpositive/bucketmapjoin1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin1.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketmapjoin1.q.out (working copy) @@ -338,13 +338,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -481,10 +481,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -512,11 +512,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -582,7 +582,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -648,7 +648,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out (working copy) @@ -75,7 +75,7 @@ Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -168,7 +168,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-8 Index: ql/src/test/results/clientpositive/mapjoin_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_distinct.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/mapjoin_distinct.q.out (working copy) @@ -16,7 +16,7 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -96,7 +96,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -315,7 +315,7 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -388,7 +388,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/bucketmapjoin3.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin3.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketmapjoin3.q.out (working copy) @@ -96,13 +96,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -242,10 +242,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -273,11 +273,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -343,7 +343,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -409,7 +409,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/smb_mapjoin_12.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_12.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/smb_mapjoin_12.q.out (working copy) @@ -81,7 +81,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -220,7 +220,7 @@ name: default.test_table3 #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### @@ -306,7 +306,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -455,7 +455,7 @@ name: default.test_table3 #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out (working copy) @@ -75,7 +75,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -128,7 +128,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out (working copy) @@ -111,7 +111,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -160,7 +160,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator @@ -290,7 +290,7 @@ Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -383,7 +383,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-8 @@ -665,7 +665,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -714,7 +714,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator Index: ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (working copy) @@ -69,13 +69,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -212,10 +212,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -243,11 +243,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -313,7 +313,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -379,7 +379,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/stats11.q.out =================================================================== --- ql/src/test/results/clientpositive/stats11.q.out (revision 1505233) +++ ql/src/test/results/clientpositive/stats11.q.out (working copy) @@ -283,13 +283,13 @@ STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -426,10 +426,10 @@ Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -457,11 +457,11 @@ name: default.bucketmapjoin_tmp_result #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -527,7 +527,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -593,7 +593,7 @@ Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true Index: ql/src/test/queries/clientpositive/smb_mapjoin_25.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_25.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_25.q (revision 0) @@ -0,0 +1,41 @@ +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.exec.max.dynamic.partitions.pernode=1000000; +set hive.exec.max.dynamic.partitions=1000000; +set hive.exec.max.created.files=1000000; +set hive.map.aggr=true; + +create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; +create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; +create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; + +load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; +load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; +load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; + +explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5; + +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge=true; +set hive.mapred.reduce.tasks.speculative.execution=false; +set hive.auto.convert.join=true; +set hive.auto.convert.sortmerge.join=true; +set hive.auto.convert.sortmerge.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000000000; +set hive.optimize.reducededuplication.min.reducer=1; +set hive.optimize.mapjoin.mapreduce=true; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ; + +-- explain +-- select * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join src c on a.key=c.value + +-- select a.key from smb_bucket_1 a + +explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5; + +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5; + Index: ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q =================================================================== --- ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q (revision 1505233) +++ ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q (working copy) @@ -19,10 +19,29 @@ set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=200; set hive.auto.convert.sortmerge.join.to.mapjoin=false; + -- A SMB join is being followed by a regular join on a non-bucketed table on a different key + +-- Three tests below are all the same query with different alias, which changes dispatch order of GenMapRedWalker +-- This is dependent to iteration order of HashMap, so can be meaningless in non-sun jdk +-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- c = TS[1]-RS[7]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value; select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value; +-- d = TS[0]-RS[7]-JOIN[8]-SEL[9]-FS[10] +-- b = TS[1]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value; +select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value; + +-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- a = TS[1]-MAPJOIN[11] +-- h = TS[2]-RS[7]-JOIN[8] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value; +select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value; + -- A SMB join is being followed by a regular join on a non-bucketed table on the same key explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key; select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy) @@ -84,7 +84,7 @@ if (currOp == op) { String currAliasId = alias; ctx.setCurrAliasId(currAliasId); - mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId)); + mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId)); QBParseInfo parseInfo = parseCtx.getQB().getParseInfo(); if (parseInfo.isAnalyzeCommand()) { @@ -139,12 +139,12 @@ Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, new HashSet(), null); - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx, partList); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx, partList); } else { // non-partitioned table - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx); } } - return null; + return true; } } assert false; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (working copy) @@ -57,28 +57,32 @@ .getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task currTask = mapredCtx.getCurrTask(); - Operator currTopOp = mapredCtx.getCurrTopOp(); String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = op.getChildOperators().get(0); Map, Task> opTaskMap = ctx .getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); + Task oldTask = opTaskMap.get(reducer); - ctx.setCurrTopOp(currTopOp); ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); - if (opMapTask == null) { + if (oldTask == null) { GenMapRedUtils.splitPlan(op, ctx); } else { - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true); - currTask = opMapTask; + GenMapRedUtils.splitPlan(op, currTask, oldTask, ctx); + currTask = oldTask; ctx.setCurrTask(currTask); } - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId())); - return null; + + if (GenMapRedUtils.hasBranchFinished(nodeOutputs)) { + ctx.addRootIfPossible(currTask); + return false; + } + + return true; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -91,6 +91,8 @@ ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task currTask = ctx.getCurrTask(); + ctx.addRootIfPossible(currTask); + FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && @@ -106,7 +108,7 @@ if (fileSinkDescs != null) { Task childTask = fileSinkDescs.get(fsOp.getConf()); processLinkedFileDesc(ctx, childTask); - return null; + return true; } // Has the user enabled merging of files for map-only jobs or for all jobs @@ -181,7 +183,7 @@ } } - return null; + return true; } /* @@ -189,26 +191,12 @@ * Use the task created by the first linked file descriptor */ private void processLinkedFileDesc(GenMRProcContext ctx, - Task childTask) - throws SemanticException { - Operator currTopOp = ctx.getCurrTopOp(); - String currAliasId = ctx.getCurrAliasId(); - List> seenOps = ctx.getSeenOps(); - List> rootTasks = ctx.getRootTasks(); + Task childTask) throws SemanticException { Task currTask = ctx.getCurrTask(); - - if (currTopOp != null) { - if (!seenOps.contains(currTopOp)) { - seenOps.add(currTopOp); - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, - (MapredWork) currTask.getWork(), false, ctx); - } - - if (!rootTasks.contains(currTask) - && (currTask.getParentTasks() == null - || currTask.getParentTasks().isEmpty())) { - rootTasks.add(currTask); - } + Operator currTopOp = ctx.getCurrTopOp(); + if (currTopOp != null && !ctx.isSeenOp(currTask, currTopOp)) { + String currAliasId = ctx.getCurrAliasId(); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx); } if (childTask != null) { @@ -702,8 +690,6 @@ String currAliasId = ctx.getCurrAliasId(); HashMap, Task> opTaskMap = ctx.getOpTaskMap(); - List> seenOps = ctx.getSeenOps(); - List> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task if (mvTask != null) { @@ -717,22 +703,13 @@ if (currTopOp != null) { Task mapTask = opTaskMap.get(null); if (mapTask == null) { - if (!seenOps.contains(currTopOp)) { - seenOps.add(currTopOp); - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, - (MapredWork) currTask.getWork(), false, ctx); + if (!ctx.isSeenOp(currTask, currTopOp)) { + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx); } opTaskMap.put(null, currTask); - if (!rootTasks.contains(currTask) - && (currTask.getParentTasks() == null - || currTask.getParentTasks().isEmpty())) { - rootTasks.add(currTask); - } } else { - if (!seenOps.contains(currTopOp)) { - seenOps.add(currTopOp); - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, - (MapredWork) mapTask.getWork(), false, ctx); + if (!ctx.isSeenOp(currTask, currTopOp)) { + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, mapTask, false, ctx); } else { UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -263,7 +263,8 @@ } catch (Exception e) { e.printStackTrace(); - throw new SemanticException("Generate New MapJoin Opertor Exeception " + e.getMessage()); + throw new SemanticException("Failed to generate new mapJoin operator " + + "by exception : " + e.getMessage()); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy) @@ -55,7 +55,6 @@ */ public static class GenMapRedCtx { Task currTask; - Operator currTopOp; String currAliasId; public GenMapRedCtx() { @@ -64,15 +63,10 @@ /** * @param currTask * the current task - * @param currTopOp - * the current top operator being traversed * @param currAliasId - * the current alias for the to operator */ - public GenMapRedCtx(Task currTask, - Operator currTopOp, String currAliasId) { + public GenMapRedCtx(Task currTask, String currAliasId) { this.currTask = currTask; - this.currTopOp = currTopOp; this.currAliasId = currAliasId; } @@ -84,13 +78,6 @@ } /** - * @return current top operator - */ - public Operator getCurrTopOp() { - return currTopOp; - } - - /** * @return current alias */ public String getCurrAliasId() { @@ -103,13 +90,13 @@ * */ public static class GenMRUnionCtx { - Task uTask; + final Task uTask; List taskTmpDir; List tt_desc; List> listTopOperators; - public GenMRUnionCtx() { - uTask = null; + public GenMRUnionCtx(Task uTask) { + this.uTask = uTask; taskTmpDir = new ArrayList(); tt_desc = new ArrayList(); listTopOperators = new ArrayList>(); @@ -119,10 +106,6 @@ return uTask; } - public void setUTask(Task uTask) { - this.uTask = uTask; - } - public void addTaskTmpDir(String taskTmpDir) { this.taskTmpDir.add(taskTmpDir); } @@ -156,8 +139,10 @@ private HiveConf conf; private HashMap, Task> opTaskMap; + private + HashMap, List>> taskToSeenOps; + private HashMap unionTaskMap; - private List> seenOps; private List seenFileSinkOps; private ParseContext parseCtx; @@ -169,7 +154,6 @@ private Operator currTopOp; private UnionOperator currUnionOp; private String currAliasId; - private List> rootOps; private DependencyCollectionTask dependencyTaskForMultiInsert; // If many fileSinkDescs are linked to each other, it is a good idea to keep track of @@ -213,14 +197,13 @@ public GenMRProcContext( HiveConf conf, HashMap, Task> opTaskMap, - List> seenOps, ParseContext parseCtx, + ParseContext parseCtx, List> mvTask, List> rootTasks, LinkedHashMap, GenMapRedCtx> mapCurrCtx, Set inputs, Set outputs) { this.conf = conf; this.opTaskMap = opTaskMap; - this.seenOps = seenOps; this.mvTask = mvTask; this.parseCtx = parseCtx; this.rootTasks = rootTasks; @@ -231,9 +214,9 @@ currTopOp = null; currUnionOp = null; currAliasId = null; - rootOps = new ArrayList>(); - rootOps.addAll(parseCtx.getTopOps().values()); unionTaskMap = new HashMap(); + taskToSeenOps = new HashMap, + List>>(); dependencyTaskForMultiInsert = null; linkedFileDescTasks = null; } @@ -255,13 +238,19 @@ this.opTaskMap = opTaskMap; } - /** - * @return operators already visited - */ - public List> getSeenOps() { - return seenOps; + public boolean isSeenOp(Task task, Operator operator) { + List> seenOps = taskToSeenOps.get(task); + return seenOps != null && seenOps.contains(operator); } + public void addSeenOp(Task task, Operator operator) { + List> seenOps = taskToSeenOps.get(task); + if (seenOps == null) { + taskToSeenOps.put(task, seenOps = new ArrayList>()); + } + seenOps.add(operator); + } + /** * @return file operators already visited */ @@ -270,14 +259,6 @@ } /** - * @param seenOps - * operators already visited - */ - public void setSeenOps(List> seenOps) { - this.seenOps = seenOps; - } - - /** * @param seenFileSinkOps * file sink operators already visited */ @@ -286,21 +267,6 @@ } /** - * @return top operators for tasks - */ - public List> getRootOps() { - return rootOps; - } - - /** - * @param rootOps - * top operators for tasks - */ - public void setRootOps(List> rootOps) { - this.rootOps = rootOps; - } - - /** * @return current parse context */ public ParseContext getParseCtx() { @@ -345,6 +311,15 @@ this.rootTasks = rootTasks; } + public boolean addRootIfPossible(Task task) { + if (task.getParentTasks() == null || task.getParentTasks().isEmpty()) { + if (!rootTasks.contains(task)) { + return rootTasks.add(task); + } + } + return false; + } + /** * @return operator to task mappings */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (working copy) @@ -82,14 +82,13 @@ UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); ctx.getMapCurrCtx().put( (Operator) union, - new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId())); // if the union is the first time seen, set current task to GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); if (uCtxTask == null) { - uCtxTask = new GenMRUnionCtx(); - uCtxTask.setUTask(ctx.getCurrTask()); + uCtxTask = new GenMRUnionCtx(ctx.getCurrTask()); ctx.setUnionTask(union, uCtxTask); } @@ -101,7 +100,7 @@ } } - return null; + return true; } /** @@ -192,14 +191,11 @@ // The current plan can be thrown away after being merged with the union // plan Task uTask = uCtxTask.getUTask(); - MapredWork plan = (MapredWork) uTask.getWork(); ctx.setCurrTask(uTask); - List> seenOps = ctx.getSeenOps(); Operator topOp = ctx.getCurrTopOp(); - if (!seenOps.contains(topOp) && topOp != null) { - seenOps.add(topOp); + if (topOp != null && !ctx.isSeenOp(uTask, topOp)) { GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx - .getCurrTopOp(), plan, false, ctx); + .getCurrTopOp(), uTask, false, ctx); } } @@ -230,8 +226,7 @@ // All inputs of this UnionOperator are in the same Reducer. // We do not need to break the operator tree. mapCurrCtx.put((Operator) nd, - new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); + new GenMapRedCtx(ctx.getCurrTask(),ctx.getCurrAliasId())); return null; } @@ -255,10 +250,9 @@ // union is encountered for the first time GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); if (uCtxTask == null) { - uCtxTask = new GenMRUnionCtx(); uPlan = GenMapRedUtils.getMapRedWork(parseCtx); uTask = TaskFactory.get(uPlan, parseCtx.getConf()); - uCtxTask.setUTask(uTask); + uCtxTask = new GenMRUnionCtx(uTask); ctx.setUnionTask(union, uCtxTask); } else { @@ -293,9 +287,9 @@ ctx.setCurrTask(uTask); mapCurrCtx.put((Operator) nd, - new GenMapRedCtx(ctx.getCurrTask(), null, null)); + new GenMapRedCtx(ctx.getCurrTask(), null)); - return null; + return true; } private boolean shouldBeRootTask( Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (working copy) @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; -import java.util.HashMap; import java.util.Map; import java.util.Stack; @@ -64,19 +63,20 @@ GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2)); Task currTask = mapredCtx.getCurrTask(); MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator currTopOp = mapredCtx.getCurrTopOp(); String currAliasId = mapredCtx.getCurrAliasId(); + + if (op.getNumChild() != 1) { + throw new IllegalStateException("Expecting operator " + op + " to have one child. " + + "But found multiple children : " + op.getChildOperators()); + } Operator reducer = op.getChildOperators().get(0); - HashMap, Task> opTaskMap = ctx - .getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); + Task oldTask = ctx.getOpTaskMap().get(reducer); - ctx.setCurrTopOp(currTopOp); ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { + if (oldTask == null) { if (currPlan.getReducer() == null) { GenMapRedUtils.initPlan(op, ctx); } else { @@ -85,14 +85,18 @@ } else { // This will happen in case of joins. The current plan can be thrown away // after being merged with the original plan - GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false); - currTask = opMapTask; + GenMapRedUtils.joinPlan(currTask, oldTask, ctx); + currTask = oldTask; ctx.setCurrTask(currTask); } - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); - return null; + mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId())); + + if (GenMapRedUtils.hasBranchFinished(nodeOutputs)) { + ctx.addRootIfPossible(currTask); + return false; + } + + return true; } - } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java (working copy) @@ -101,11 +101,11 @@ ctx.setCurrTask(reducerTask); } - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId())); // the union operator has been processed ctx.setCurrUnionOp(null); - return null; + return true; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; +import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -60,7 +61,6 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -107,25 +107,16 @@ plan.setNumReduceTasks(desc.getNumReducers()); - List> rootTasks = opProcCtx.getRootTasks(); - - if (!rootTasks.contains(currTask) - && (currTask.getParentTasks() == null - || currTask.getParentTasks().isEmpty())) { - rootTasks.add(currTask); - } if (reducer.getClass() == JoinOperator.class || reducer.getClass() == DemuxOperator.class) { plan.setNeedsTagging(true); } assert currTopOp != null; - List> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); - if (!seenOps.contains(currTopOp)) { - seenOps.add(currTopOp); - setTaskPlan(currAliasId, currTopOp, plan, false, opProcCtx); + if (!opProcCtx.isSeenOp(currTask, currTopOp)) { + setTaskPlan(currAliasId, currTopOp, currTask, false, opProcCtx); } currTopOp = null; @@ -169,16 +160,14 @@ } private static void setUnionPlan(GenMRProcContext opProcCtx, - boolean local, MapredWork plan, GenMRUnionCtx uCtx, + boolean local, Task currTask, GenMRUnionCtx uCtx, boolean mergeTask) throws SemanticException { Operator currTopOp = opProcCtx.getCurrTopOp(); if (currTopOp != null) { - List> seenOps = opProcCtx.getSeenOps(); String currAliasId = opProcCtx.getCurrAliasId(); - if (!seenOps.contains(currTopOp) || mergeTask) { - seenOps.add(currTopOp); - setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + if (mergeTask || !opProcCtx.isSeenOp(currTask, currTopOp)) { + setTaskPlan(currAliasId, currTopOp, currTask, local, opProcCtx); } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); @@ -194,6 +183,7 @@ List> topOperators = uCtx.getListTopOperators(); + MapredWork plan = (MapredWork) currTask.getWork(); for (int pos = 0; pos < size; pos++) { String taskTmpDir = taskTmpDirLst.get(pos); TableDesc tt_desc = tt_descLst.get(pos); @@ -217,14 +207,12 @@ public static void initUnionPlan(GenMRProcContext opProcCtx, UnionOperator currUnionOp, Task currTask, boolean local) throws SemanticException { - MapredWork plan = (MapredWork) currTask.getWork(); - // In case of lateral views followed by a join, the same tree // can be traversed more than one if (currUnionOp != null) { GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp); assert uCtx != null; - setUnionPlan(opProcCtx, local, plan, uCtx, false); + setUnionPlan(opProcCtx, local, currTask, uCtx, false); } } @@ -236,12 +224,11 @@ Task currentUnionTask, Task existingTask, boolean local) throws SemanticException { - MapredWork plan = (MapredWork) existingTask.getWork(); assert currUnionOp != null; GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp); assert uCtx != null; - setUnionPlan(opProcCtx, local, plan, uCtx, true); + setUnionPlan(opProcCtx, local, existingTask, uCtx, true); List> parTasks = null; if (opProcCtx.getRootTasks().contains(currentUnionTask)) { @@ -276,104 +263,105 @@ } /** - * Merge the current task with the task for the current reducer. + * Merge the current task into the old task for the reducer * - * @param op - * operator being processed + * @param currTask + * the current task for the current reducer * @param oldTask * the old task for the current reducer - * @param task - * the current task for the current reducer * @param opProcCtx * processing context - * @param pos - * position of the parent in the stack */ - public static void joinPlan(Operator op, - Task oldTask, Task task, - GenMRProcContext opProcCtx, int pos, boolean split) + public static void joinPlan(Task currTask, + Task oldTask, GenMRProcContext opProcCtx) throws SemanticException { - Task currTask = task; - MapredWork plan = (MapredWork) currTask.getWork(); + assert currTask != null && oldTask != null; + Operator currTopOp = opProcCtx.getCurrTopOp(); List> parTasks = null; - // terminate the old task and make current task dependent on it - if (split) { - assert oldTask != null; - splitTasks(op, oldTask, currTask, opProcCtx, true, false, 0); - } else { - if ((oldTask != null) && (oldTask.getParentTasks() != null) - && !oldTask.getParentTasks().isEmpty()) { - parTasks = new ArrayList>(); - parTasks.addAll(oldTask.getParentTasks()); + if (currTask.getParentTasks() != null + && !currTask.getParentTasks().isEmpty()) { + parTasks = new ArrayList>(); + parTasks.addAll(currTask.getParentTasks()); - Object[] parTaskArr = parTasks.toArray(); - for (Object element : parTaskArr) { - ((Task) element).removeDependentTask(oldTask); - } + Object[] parTaskArr = parTasks.toArray(); + for (Object element : parTaskArr) { + ((Task) element).removeDependentTask(currTask); } } if (currTopOp != null) { - List> seenOps = opProcCtx.getSeenOps(); - String currAliasId = opProcCtx.getCurrAliasId(); - - if (!seenOps.contains(currTopOp)) { - seenOps.add(currTopOp); - boolean local = false; - if (pos != -1) { - local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false - : true; - } - setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - } - currTopOp = null; - opProcCtx.setCurrTopOp(currTopOp); + mergeInput(currTopOp, opProcCtx, oldTask, false); } - if ((oldTask != null) && (parTasks != null)) { + if (parTasks != null) { for (Task parTask : parTasks) { - parTask.addDependentTask(currTask); - if (opProcCtx.getRootTasks().contains(currTask)) { - opProcCtx.getRootTasks().remove(currTask); - } + parTask.addDependentTask(oldTask); } } - opProcCtx.setCurrTask(currTask); + if (oldTask instanceof MapRedTask && currTask instanceof MapRedTask) { + ((MapRedTask)currTask).getWork().mergingInto(((MapRedTask) oldTask).getWork()); + } + + opProcCtx.setCurrTopOp(null); + opProcCtx.setCurrTask(oldTask); } /** - * Split the current plan by creating a temporary destination. + * If currTopOp is not set for input of the task, add input for to the task + */ + static boolean mergeInput(Operator currTopOp, + GenMRProcContext opProcCtx, Task task, boolean local) + throws SemanticException { + if (!opProcCtx.isSeenOp(task, currTopOp)) { + String currAliasId = opProcCtx.getCurrAliasId(); + setTaskPlan(currAliasId, currTopOp, task, local, opProcCtx); + return true; + } + return false; + } + + /** + * Met cRS in pRS(parentTask)-cRS-OP(childTask) case + * Split and link two tasks by temporary file : pRS-FS / TS-cRS-OP + */ + static void splitPlan(ReduceSinkOperator cRS, + Task parentTask, Task childTask, + GenMRProcContext opProcCtx) throws SemanticException { + assert parentTask != null && childTask != null; + splitTasks(cRS, parentTask, childTask, opProcCtx); + } + + /** + * Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case + * Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP * - * @param op + * @param cRS * the reduce sink operator encountered * @param opProcCtx * processing context */ - public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) + static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); - MapredWork cplan = getMapRedWork(parseCtx); - Task redTask = TaskFactory.get(cplan, parseCtx + Task parentTask = opProcCtx.getCurrTask(); + + MapredWork childPlan = getMapRedWork(parseCtx); + Task childTask = TaskFactory.get(childPlan, parseCtx .getConf()); - Operator reducer = op.getChildOperators().get(0); + Operator reducer = cRS.getChildOperators().get(0); // Add the reducer - cplan.setReducer(reducer); - ReduceSinkDesc desc = op.getConf(); + childPlan.setReducer(reducer); + ReduceSinkDesc desc = cRS.getConf(); + childPlan.setNumReduceTasks(new Integer(desc.getNumReducers())); - cplan.setNumReduceTasks(new Integer(desc.getNumReducers())); + opProcCtx.getOpTaskMap().put(reducer, childTask); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - opTaskMap.put(reducer, redTask); - Task currTask = opProcCtx.getCurrTask(); - - splitTasks(op, currTask, redTask, opProcCtx, true, false, 0); - opProcCtx.getRootOps().add(op); + splitTasks(cRS, parentTask, childTask, opProcCtx); } /** @@ -391,9 +379,9 @@ * processing context */ public static void setTaskPlan(String alias_id, - Operator topOp, MapredWork plan, boolean local, + Operator topOp, Task task, boolean local, GenMRProcContext opProcCtx) throws SemanticException { - setTaskPlan(alias_id, topOp, plan, local, opProcCtx, null); + setTaskPlan(alias_id, topOp, task, local, opProcCtx, null); } private static ReadEntity getParentViewInfo(String alias_id, @@ -435,8 +423,9 @@ * pruned partition list. If it is null it will be computed on-the-fly. */ public static void setTaskPlan(String alias_id, - Operator topOp, MapredWork plan, boolean local, + Operator topOp, Task task, boolean local, GenMRProcContext opProcCtx, PrunedPartitionList pList) throws SemanticException { + MapredWork plan = (MapredWork) task.getWork(); ParseContext parseCtx = opProcCtx.getParseCtx(); Set inputs = opProcCtx.getInputs(); @@ -684,6 +673,7 @@ } plan.setMapLocalWork(localPlan); } + opProcCtx.addSeenOp(task, topOp); } /** @@ -854,20 +844,20 @@ @SuppressWarnings("nls") /** - * Merge the tasks - by creating a temporary file between them. + * Split two tasks by creating a temporary file between them. + * * @param op reduce sink operator being processed - * @param oldTask the parent task - * @param task the child task + * @param parentTask the parent task + * @param childTask the child task * @param opProcCtx context - * @param setReducer does the reducer needs to be set - * @param pos position of the parent **/ - public static void splitTasks(Operator op, - Task parentTask, - Task childTask, GenMRProcContext opProcCtx, - boolean setReducer, boolean local, int posn) throws SemanticException { - childTask.getWork(); - Operator currTopOp = opProcCtx.getCurrTopOp(); + private static void splitTasks(ReduceSinkOperator op, + Task parentTask, Task childTask, + GenMRProcContext opProcCtx) throws SemanticException { + if (op.getNumParent() != 1) { + throw new IllegalStateException("Expecting operator " + op + " to have one parent. " + + "But found multiple parents : " + op.getParentOperators()); + } ParseContext parseCtx = opProcCtx.getParseCtx(); parentTask.addDependentTask(childTask); @@ -883,7 +873,7 @@ Context baseCtx = parseCtx.getContext(); String taskTmpDir = baseCtx.getMRTmpFileURI(); - Operator parent = op.getParentOperators().get(posn); + Operator parent = op.getParentOperators().get(0); TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); @@ -925,43 +915,48 @@ childOpList = new ArrayList>(); childOpList.add(op); ts_op.setChildOperators(childOpList); - op.getParentOperators().set(posn, ts_op); + op.getParentOperators().set(0, ts_op); Map, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); - mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null)); + mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null)); String streamDesc = taskTmpDir; MapredWork cplan = (MapredWork) childTask.getWork(); - if (setReducer) { - Operator reducer = op.getChildOperators().get(0); + Operator reducer = op.getChildOperators().get(0); - if (reducer.getClass() == JoinOperator.class) { - String origStreamDesc; - streamDesc = "$INTNAME"; - origStreamDesc = streamDesc; - int pos = 0; - while (cplan.getAliasToWork().get(streamDesc) != null) { - streamDesc = origStreamDesc.concat(String.valueOf(++pos)); - } + if (reducer.getClass() == JoinOperator.class) { + String origStreamDesc; + streamDesc = "$INTNAME"; + origStreamDesc = streamDesc; + int pos = 0; + while (cplan.getAliasToWork().get(streamDesc) != null) { + streamDesc = origStreamDesc.concat(String.valueOf(++pos)); } // TODO: Allocate work to remove the temporary files and make that // dependent on the redTask - if (reducer.getClass() == JoinOperator.class || - reducer.getClass() == DemuxOperator.class) { - cplan.setNeedsTagging(true); - } + cplan.setNeedsTagging(true); } // Add the path to alias mapping - setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc); + setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, false, tt_desc); opProcCtx.setCurrTopOp(null); opProcCtx.setCurrAliasId(null); opProcCtx.setCurrTask(childTask); + opProcCtx.addRootIfPossible(parentTask); } + static boolean hasBranchFinished(Object... children) { + for (Object child : children) { + if (child == null) { + return false; + } + } + return true; + } + private GenMapRedUtils() { // prevent instantiation } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java (working copy) @@ -53,8 +53,7 @@ .getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2)); mapCurrCtx.put((Operator) nd, new GenMapRedCtx( - mapredCtx.getCurrTask(), mapredCtx.getCurrTopOp(), mapredCtx - .getCurrAliasId())); - return null; + mapredCtx.getCurrTask(), mapredCtx.getCurrAliasId())); + return true; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy) @@ -144,38 +144,16 @@ * position of the parent */ private static void initMapJoinPlan(AbstractMapJoinOperator op, - GenMRProcContext opProcCtx, int pos) + Task currTask, + GenMRProcContext opProcCtx, boolean local) throws SemanticException { - Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); - int parentPos = (pos == -1) ? 0 : pos; - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( - parentPos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork plan = (MapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - Operator currTopOp = opProcCtx.getCurrTopOp(); - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - // The map is overloaded to keep track of mapjoins also - opTaskMap.put(op, currTask); + opProcCtx.getOpTaskMap().put(op, currTask); - List> rootTasks = opProcCtx.getRootTasks(); - if(!rootTasks.contains(currTask) - && (currTask.getParentTasks() == null - || currTask.getParentTasks().isEmpty())) { - rootTasks.add(currTask); - } - - assert currTopOp != null; - opProcCtx.getSeenOps().add(currTopOp); - + Operator currTopOp = opProcCtx.getCurrTopOp(); String currAliasId = opProcCtx.getCurrAliasId(); - boolean local = (pos == desc.getPosBigTable()) ? false : true; - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - setupBucketMapJoinInfo(plan, op); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, local, opProcCtx); } /** @@ -191,29 +169,12 @@ * @param pos * position of the parent in the stack */ - public static void joinMapJoinPlan(AbstractMapJoinOperator op, + private static void joinMapJoinPlan(AbstractMapJoinOperator op, Task oldTask, - GenMRProcContext opProcCtx, int pos) + GenMRProcContext opProcCtx, boolean local) throws SemanticException { - MapredWork plan = (MapredWork) oldTask.getWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); - - List> seenOps = opProcCtx.getSeenOps(); - String currAliasId = opProcCtx.getCurrAliasId(); - - if (!seenOps.contains(currTopOp)) { - seenOps.add(currTopOp); - boolean local = false; - if (pos != -1) { - local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false - : true; - } - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - setupBucketMapJoinInfo(plan, op); - } - currTopOp = null; - opProcCtx.setCurrTopOp(currTopOp); - opProcCtx.setCurrTask(oldTask); + GenMapRedUtils.mergeInput(currTopOp, opProcCtx, oldTask, local); } /* @@ -236,17 +197,14 @@ Map, GenMapRedCtx> mapCurrCtx = ctx .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( - pos)); + GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(pos)); Task currTask = mapredCtx.getCurrTask(); MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator currTopOp = mapredCtx.getCurrTopOp(); String currAliasId = mapredCtx.getCurrAliasId(); HashMap, Task> opTaskMap = ctx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(mapJoin); + Task oldTask = opTaskMap.get(mapJoin); - ctx.setCurrTopOp(currTopOp); ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); @@ -254,20 +212,23 @@ // If we are seeing this mapjoin for the second or later time then atleast one of the // branches for this mapjoin have been encounered. Join the plan with the plan created // the first time. - if (opMapTask == null) { + boolean local = pos != mapJoin.getConf().getPosBigTable(); + if (oldTask == null) { assert currPlan.getReducer() == null; - initMapJoinPlan(mapJoin, ctx, pos); + initMapJoinPlan(mapJoin, currTask, ctx, local); } else { // The current plan can be thrown away after being merged with the // original plan - joinMapJoinPlan(mapJoin, opMapTask, ctx, pos); - currTask = opMapTask; - ctx.setCurrTask(currTask); + joinMapJoinPlan(mapJoin, oldTask, ctx, local); + ctx.setCurrTask(currTask = oldTask); } + MapredWork plan = (MapredWork) currTask.getWork(); + setupBucketMapJoinInfo(plan, mapJoin); - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx - .getCurrTopOp(), ctx.getCurrAliasId())); - return null; + mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId())); + + // local aliases need not to hand over context further + return !local; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy) @@ -95,7 +95,7 @@ private boolean mapperCannotSpanPartns; - // used to indicate the input is sorted, and so a BinarySearchRecordReader shoudl be used + // used to indicate the input is sorted, and so a BinarySearchRecordReader should be used private boolean inputFormatSorted = false; private transient boolean useBucketizedHiveInputFormat; @@ -630,4 +630,9 @@ return samplingType == 1 ? "SAMPLING_ON_PREV_MR" : samplingType == 2 ? "SAMPLING_ON_START" : null; } + + public void mergingInto(MapredWork mapred) { + // currently, this is sole field affecting mergee task + mapred.useBucketizedHiveInputFormat |= useBucketizedHiveInputFormat; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (working copy) @@ -75,6 +75,13 @@ * @throws SemanticException */ public void dispatch(Node nd, Stack ndStack) throws SemanticException { + dispatchAndReturn(nd, ndStack); + } + + /** + * Returns dispatch result + */ + public T dispatchAndReturn(Node nd, Stack ndStack) throws SemanticException { Object[] nodeOutputs = null; if (nd.getChildren() != null) { nodeOutputs = new Object[nd.getChildren().size()]; @@ -86,6 +93,7 @@ Object retVal = dispatcher.dispatch(nd, ndStack, nodeOutputs); retMap.put(nd, retVal); + return (T) retVal; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java (working copy) @@ -20,7 +20,6 @@ import java.util.List; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.Node; @@ -52,12 +51,11 @@ // maintain the stack of operators encountered opStack.push(nd); - dispatch(nd, opStack); + Boolean result = dispatchAndReturn(nd, opStack); - // kids of reduce sink operator need not be traversed again - if ((children == null) - || ((nd instanceof ReduceSinkOperator) && (getDispatchedList() - .containsAll(children)))) { + // kids of reduce sink operator or mapjoin operators merged into root task + // need not be traversed again + if (children == null || result == Boolean.FALSE) { opStack.pop(); return; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java (working copy) @@ -217,8 +217,7 @@ GenMRProcContext procCtx = new GenMRProcContext( conf, new HashMap, Task>(), - new ArrayList>(), tempParseContext, - mvTask, rootTasks, + tempParseContext, mvTask, rootTasks, new LinkedHashMap, GenMapRedCtx>(), inputs, outputs); @@ -599,8 +598,7 @@ boolean hasNonLocalJob = false; for (ExecDriver mrtask : mrtasks) { try { - ContentSummary inputSummary = Utilities.getInputSummary - (ctx, (MapredWork) mrtask.getWork(), p); + ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork(), p); int numReducers = getNumberOfReducers(mrtask.getWork(), conf); long estimatedInput; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (revision 1505233) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (working copy) @@ -86,6 +86,7 @@ protected String id; protected T work; + public static enum FeedType { DYNAMIC_PARTITIONS, // list of dynamic partitions };