Index: data/files/smbbucket_4.txt =================================================================== --- data/files/smbbucket_4.txt (revision 0) +++ data/files/smbbucket_4.txt (revision 0) @@ -0,0 +1,3 @@ +4val_356 +2000val_169 +4000val_125 Index: data/files/smbbucket_5.txt =================================================================== --- data/files/smbbucket_5.txt (revision 0) +++ data/files/smbbucket_5.txt (revision 0) @@ -0,0 +1,3 @@ +484val_169 +3000val_169 +5000val_125 Index: ql/src/test/results/clientpositive/smb_mapjoin_8.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_8.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_8.q.out (revision 0) @@ -0,0 +1,302 @@ +PREHOOK: query: drop table smb_bucket4_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table smb_bucket4_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_bucket4_1 +PREHOOK: query: drop table smb_bucket4_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table smb_bucket4_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_bucket4_2 +PREHOOK: query: drop table smb_join_results +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table smb_join_results +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table normal_join_results +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table normal_join_results +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table smb_join_results(k1 int, v1 string, k2 int, v2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table smb_join_results(k1 int, v1 string, k2 int, v2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_join_results +PREHOOK: query: create table normal_join_results(k1 int, v1 string, k2 int, v2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table normal_join_results(k1 int, v1 string, k2 int, v2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@normal_join_results +PREHOOK: query: load data local inpath '../data/files/smbbucket_4.txt' into table smb_bucket4_1 +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/smbbucket_4.txt' into table smb_bucket4_1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_bucket4_1 +PREHOOK: query: load data local inpath '../data/files/smbbucket_5.txt' into table smb_bucket4_2 +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/smbbucket_5.txt' into table smb_bucket4_2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_bucket4_2 +PREHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF smb_bucket4_1 a) (TOK_TABREF smb_bucket4_2 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB smb_join_results)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: smb_join_results + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-33_275_643756030781304008/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: smb_join_results + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-33_275_643756030781304008/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: k1 + type: int + expr: v1 + type: string + expr: k2 + type: int + expr: v2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: smb_join_results + + +PREHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Output: default@smb_join_results +POSTHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from smb_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-38_505_4786986196967423643/10000 +POSTHOOK: query: select * from smb_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-38_505_4786986196967423643/10000 +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +4 val_356 NULL NULL +NULL NULL 484 val_169 +2000 val_169 NULL NULL +NULL NULL 3000 val_169 +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Output: default@normal_join_results +POSTHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Output: default@normal_join_results +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@normal_join_results +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-42_843_5593391721632485551/10000 +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@normal_join_results +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-42_843_5593391721632485551/10000 +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +6004 8484 692698274 692696386 +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-47_081_1369810508533393772/10000 +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Tools/build/ql/scratchdir/hive_2010-05-24_14-55-47_081_1369810508533393772/10000 +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +6004 8484 692698274 692696386 +PREHOOK: query: drop table smb_join_results +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table smb_join_results +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: drop table normal_join_results +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table normal_join_results +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@normal_join_results +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: drop table smb_bucket4_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table smb_bucket4_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: drop table smb_bucket4_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table smb_bucket4_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] Index: ql/src/test/queries/clientpositive/smb_mapjoin_8.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_8.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_8.q (revision 0) @@ -0,0 +1,39 @@ +set hive.enforce.bucketing = true; +set hive.enforce.sorting = true; +set hive.exec.reducers.max = 1; + +drop table smb_bucket4_1; +CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS; + +drop table smb_bucket4_2; +CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS; + +drop table smb_join_results; +drop table normal_join_results; +create table smb_join_results(k1 int, v1 string, k2 int, v2 string); +create table normal_join_results(k1 int, v1 string, k2 int, v2 string); + +load data local inpath '../data/files/smbbucket_4.txt' into table smb_bucket4_1; +load data local inpath '../data/files/smbbucket_5.txt' into table smb_bucket4_2; + +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; + +explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key; + +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key; + +select * from smb_join_results; + +insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key; + +select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results; +select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results; + +drop table smb_join_results; +drop table normal_join_results; +drop table smb_bucket4_1; +drop table smb_bucket4_2; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (revision 947793) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (working copy) @@ -229,17 +229,20 @@ bigTblRowContainer = this.candidateStorage.get((byte)this.posBigTable); allFetchOpDone = allFetchOpDone(); } - + if (allFetchOpDone && this.candidateStorage.get((byte) this.posBigTable).size() > 0) { // if all fetch operator for small tables are done and there are data left // in big table - for (byte t : order) { - if(this.foundNextKeyGroup.get(t) && this.nextKeyWritables.get(t) != null) { - promoteNextGroupToCandidate(t); + while(this.candidateStorage.get((byte) this.posBigTable).size() > 0) { + for (byte t : order) { + if((t != (byte) this.posBigTable) && this.foundNextKeyGroup.get(t) + && this.nextKeyWritables.get(t) != null) { + promoteNextGroupToCandidate(t); + } } + joinOneGroup(); } - joinOneGroup(); } else { while (!allFetchOpDone) { List ret = joinOneGroup();