Index: data/files/in.txt =================================================================== --- data/files/in.txt (revision 0) +++ data/files/in.txt (revision 0) @@ -0,0 +1,3 @@ +35 +48 +100100 Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (revision 985813) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.LongWritable; @@ -86,6 +87,10 @@ // number of rows for the key in the given table int sz = storage.get(alias).size(); + StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; + StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY + .toString()); + Object keyObject = soi.getStructFieldData(row, sf); // Are we consuming too much memory if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) { @@ -105,10 +110,6 @@ // operand // We won't output a warning for the last join operand since the size // will never goes to joinEmitInterval. - StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; - StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY - .toString()); - Object keyObject = soi.getStructFieldData(row, sf); LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject); nextSz = getNextSize(nextSz); @@ -117,6 +118,11 @@ // Add the value to the vector storage.get(alias).add(nr); + // if join-key is null, process each row in different group. + if (SerDeUtils.isNullObject(keyObject, sf.getFieldObjectInspector())) { + endGroup(); + startGroup(); + } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 985813) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -282,7 +282,8 @@ MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); MapJoinObjectValue o = mapJoinTables.get(pos).get(keyMap); - if (o == null) { + // there is no join-value or join-key has null elements + if (o == null || (hasNullElements(key))) { if (noOuterJoin) { storage.put(pos, emptyList); } else { @@ -312,6 +313,19 @@ } } + // returns true if there are elements in key list and all of them are null + private boolean hasNullElements(ArrayList key) { + if (key != null && key.size() > 0) { + for (Object k : key) { + if (k != null) { + return false; + } + } + return true; + } + return false; + } + @Override public void closeOp(boolean abort) throws HiveException { if(mapJoinTables != null) { Index: ql/src/test/queries/clientpositive/join_nulls.q =================================================================== --- ql/src/test/queries/clientpositive/join_nulls.q (revision 0) +++ ql/src/test/queries/clientpositive/join_nulls.q (revision 0) @@ -0,0 +1,39 @@ +DROP TABLE myinput1; +CREATE TABLE myinput1(key int, value int); +LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1; + +SELECT * FROM myinput1 a JOIN myinput1 b; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value; + +SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value; + +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; + +DROP TABLE myinput1; Index: ql/src/test/results/clientpositive/join_nulls.q.out =================================================================== --- ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) @@ -0,0 +1,376 @@ +PREHOOK: query: DROP TABLE myinput1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE myinput1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@myinput1 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-40_862_5169489412953976050/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-40_862_5169489412953976050/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-43_430_1882134103866507523/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-43_430_1882134103866507523/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-45_786_4065276775173582703/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-45_786_4065276775173582703/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-48_221_3299213742743622538/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-48_221_3299213742743622538/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-50_609_97341509577214090/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-50_609_97341509577214090/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-53_002_5665479026816559214/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-53_002_5665479026816559214/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-55_320_3336438430398535358/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-55_320_3336438430398535358/-mr-10000 +NULL 35 NULL NULL +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-57_617_7917918040625103194/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-31-57_617_7917918040625103194/-mr-10000 +48 NULL NULL NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-00_034_7006990238691305372/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-00_034_7006990238691305372/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-02_267_5669355492198657157/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-02_267_5669355492198657157/-mr-10000 +NULL NULL 48 NULL +NULL NULL NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-04_513_2247832143675591782/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-04_513_2247832143675591782/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-06_905_1660668133562052487/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-06_905_1660668133562052487/-mr-10000 +NULL NULL 48 NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-09_307_396548105345204393/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-09_307_396548105345204393/-mr-10000 +NULL 35 NULL NULL +NULL NULL 48 NULL +NULL NULL NULL 35 +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-11_631_9201789213688577290/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-11_631_9201789213688577290/-mr-10000 +NULL 35 NULL NULL +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-13_983_1372711602652539964/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-13_983_1372711602652539964/-mr-10000 +48 NULL NULL NULL +NULL NULL 48 NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-16_299_2268746528450525777/-mr-10000 +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-16_299_2268746528450525777/-mr-10000 +NULL NULL NULL NULL 48 NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-18_657_7997393866041220170/-mr-10000 +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-18_657_7997393866041220170/-mr-10000 +NULL NULL 48 NULL NULL NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-21_018_8366426722827397107/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-21_018_8366426722827397107/-mr-10000 +NULL NULL NULL NULL 48 NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-25_611_692780866301455822/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-25_611_692780866301455822/-mr-10000 +NULL 35 NULL 35 +48 NULL NULL 35 +100 100 NULL 35 +NULL 35 48 NULL +48 NULL 48 NULL +100 100 48 NULL +NULL 35 100 100 +48 NULL 100 100 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-27_894_8138699055069926311/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-27_894_8138699055069926311/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-30_110_6442738193740947901/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-30_110_6442738193740947901/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-32_317_5383596540235888020/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-32_317_5383596540235888020/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-34_514_1405750666092125282/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-34_514_1405750666092125282/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-36_760_9019938555543221173/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-36_760_9019938555543221173/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-38_987_4730755161024316448/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-38_987_4730755161024316448/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-41_224_2300399187712071530/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-41_224_2300399187712071530/-mr-10000 +NULL 35 NULL NULL +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-43_458_1178119106678444213/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-43_458_1178119106678444213/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-45_683_2166629956776905841/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-45_683_2166629956776905841/-mr-10000 +NULL 35 NULL 35 +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-47_883_4244514772255991833/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-47_883_4244514772255991833/-mr-10000 +NULL NULL NULL 35 +NULL NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-50_106_3320497208932853297/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-50_106_3320497208932853297/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-52_427_6815722267553266564/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_01-32-52_427_6815722267553266564/-mr-10000 +NULL 35 NULL 35 +NULL NULL 48 NULL +100 100 100 100 +PREHOOK: query: DROP TABLE myinput1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE myinput1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@myinput1 Index: serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (revision 985813) +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (working copy) @@ -331,6 +331,92 @@ } } + /** + * True if Object passed is representing null object. + * + * @param o The object + * @param oi The ObjectInspector + * + * @return true if the object passed is representing NULL object + * false otherwise + */ + public static boolean isNullObject(Object o, ObjectInspector oi) { + switch (oi.getCategory()) { + case PRIMITIVE: { + if (o == null) { + return true; + } + return false; + } + case LIST: { + ListObjectInspector loi = (ListObjectInspector) oi; + ObjectInspector listElementObjectInspector = loi + .getListElementObjectInspector(); + List olist = loi.getList(o); + if (olist == null) { + return true; + } else { + // there are no elements in the list + if (olist.size() == 0) { + return false; + } + // if all the elements are representing null, then return true + for (int i = 0; i < olist.size(); i++) { + if (!isNullObject(olist.get(i), listElementObjectInspector)) { + return false; + } + } + return false; + } + } + case MAP: { + MapObjectInspector moi = (MapObjectInspector) oi; + ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); + ObjectInspector mapValueObjectInspector = moi + .getMapValueObjectInspector(); + Map omap = moi.getMap(o); + if (omap == null) { + return true; + } else { + // there are no elements in the map + if (omap.entrySet().size() == 0) { + return false; + } + // if all the entries of map are representing null, then return true + for (Map.Entry entry : omap.entrySet()) { + if (!isNullObject(entry.getKey(), mapKeyObjectInspector) + || !isNullObject(entry.getValue(), mapValueObjectInspector)) { + return false; + } + } + return true; + } + } + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector) oi; + List structFields = soi.getAllStructFieldRefs(); + if (o == null) { + return true; + } else { + // there are no fields in the struct + if (structFields.size() == 0) { + return false; + } + // if all the fields of struct are representing null, then return true + for (int i = 0; i < structFields.size(); i++) { + if (!isNullObject(soi.getStructFieldData(o, structFields.get(i)), + structFields.get(i).getFieldObjectInspector())) { + return false; + } + } + return true; + } + } + default: + throw new RuntimeException("Unknown type in ObjectInspector!"); + } + } + private SerDeUtils() { // prevent instantiation }