Index: data/files/in.txt =================================================================== --- data/files/in.txt (revision 0) +++ data/files/in.txt (revision 0) @@ -0,0 +1,3 @@ +356 +484 +1010 \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (revision 985813) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (working copy) @@ -117,6 +117,11 @@ // Add the value to the vector storage.get(alias).add(nr); + // if join-key is null, process each row in different group. + if (nr == null || nr.size() == 0 || nr.get(0) == null) { + endGroup(); + startGroup(); + } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 985813) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -282,7 +282,9 @@ MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); MapJoinObjectValue o = mapJoinTables.get(pos).get(keyMap); - if (o == null) { + // there is no join-value or join-key is null + if (o == null || + (key == null || key.size() == 0 || key.get(0) == null)) { if (noOuterJoin) { storage.put(pos, emptyList); } else { Index: ql/src/test/queries/clientpositive/join_nulls.q =================================================================== --- ql/src/test/queries/clientpositive/join_nulls.q (revision 0) +++ ql/src/test/queries/clientpositive/join_nulls.q (revision 0) @@ -0,0 +1,13 @@ +drop table myinput1; +create table myinput1(key int, value int); +LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1; + +select * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +select * FROM myinput1 a left outer JOIN myinput1 b ON a.key = b.value; +select * FROM myinput1 a right outer JOIN myinput1 b ON a.key = b.value; +select * FROM myinput1 a full outer JOIN myinput1 b ON a.key = b.value; +select /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +select /*+ MAPJOIN(b) */ * FROM myinput1 a left outer JOIN myinput1 b ON a.key = b.value; +select /*+ MAPJOIN(a) */ * FROM myinput1 a right outer JOIN myinput1 b ON a.key = b.value; + +drop table myinput1; Index: ql/src/test/results/clientpositive/join_nulls.q.out =================================================================== --- ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) @@ -0,0 +1,94 @@ +PREHOOK: query: drop table myinput1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table myinput1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table myinput1(key int, value int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@myinput1 +PREHOOK: query: select * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-23_880_4545760163166530272/-mr-10000 +POSTHOOK: query: select * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-23_880_4545760163166530272/-mr-10000 +10 10 10 10 +PREHOOK: query: select * FROM myinput1 a left outer JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-27_170_6373292726803257879/-mr-10000 +POSTHOOK: query: select * FROM myinput1 a left outer JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-27_170_6373292726803257879/-mr-10000 +NULL 356 NULL NULL +10 10 10 10 +484 NULL NULL NULL +PREHOOK: query: select * FROM myinput1 a right outer JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-29_606_4701376975218841825/-mr-10000 +POSTHOOK: query: select * FROM myinput1 a right outer JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-29_606_4701376975218841825/-mr-10000 +NULL NULL 484 NULL +10 10 10 10 +NULL NULL NULL 356 +PREHOOK: query: select * FROM myinput1 a full outer JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-31_936_6367955939204339006/-mr-10000 +POSTHOOK: query: select * FROM myinput1 a full outer JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-31_936_6367955939204339006/-mr-10000 +NULL 356 NULL NULL +NULL NULL 484 NULL +10 10 10 10 +NULL NULL NULL 356 +484 NULL NULL NULL +PREHOOK: query: select /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-34_348_7227459679424217180/-mr-10000 +POSTHOOK: query: select /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-34_348_7227459679424217180/-mr-10000 +10 10 10 10 +PREHOOK: query: select /*+ MAPJOIN(b) */ * FROM myinput1 a left outer JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-36_856_23966332066305275/-mr-10000 +POSTHOOK: query: select /*+ MAPJOIN(b) */ * FROM myinput1 a left outer JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-36_856_23966332066305275/-mr-10000 +NULL 356 NULL NULL +484 NULL NULL NULL +10 10 10 10 +PREHOOK: query: select /*+ MAPJOIN(a) */ * FROM myinput1 a right outer JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-39_131_3446574342692296755/-mr-10000 +POSTHOOK: query: select /*+ MAPJOIN(a) */ * FROM myinput1 a right outer JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-16_04-02-39_131_3446574342692296755/-mr-10000 +NULL NULL NULL 356 +NULL NULL 484 NULL +10 10 10 10 +PREHOOK: query: drop table myinput1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table myinput1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@myinput1