Index: data/files/in.txt =================================================================== --- data/files/in.txt (revision 0) +++ data/files/in.txt (revision 0) @@ -0,0 +1,3 @@ +35 +48 +100100 Index: ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (revision 987014) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (working copy) @@ -130,4 +130,18 @@ public int getType() { return OperatorType.MAPJOIN; } + + // returns true if there are elements in key list and all of them are null + protected boolean hasNullElements(ArrayList key) { + if (key != null && key.size() > 0) { + for (Object k : key) { + if (k != null) { + return false; + } + } + return true; + } + return false; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (revision 987014) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.LongWritable; @@ -86,6 +87,10 @@ // number of rows for the key in the given table int sz = storage.get(alias).size(); + StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; + StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY + .toString()); + Object keyObject = soi.getStructFieldData(row, sf); // Are we consuming too much memory if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) { @@ -105,10 +110,6 @@ // operand // We won't output a warning for the last join operand since the size // will never goes to joinEmitInterval. - StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; - StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY - .toString()); - Object keyObject = soi.getStructFieldData(row, sf); LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject); nextSz = getNextSize(nextSz); @@ -117,6 +118,11 @@ // Add the value to the vector storage.get(alias).add(nr); + // if join-key is null, process each row in different group. + if (SerDeUtils.isNullObject(keyObject, sf.getFieldObjectInspector())) { + endGroup(); + startGroup(); + } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 987014) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -282,7 +282,8 @@ MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); MapJoinObjectValue o = mapJoinTables.get(pos).get(keyMap); - if (o == null) { + // there is no join-value or join-key has null elements + if (o == null || (hasNullElements(key))) { if (noOuterJoin) { storage.put(pos, emptyList); } else { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (revision 987014) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (working copy) @@ -383,6 +383,13 @@ private int compareKeys (ArrayList k1, ArrayList k2) { int ret = 0; + if (hasNullElements(k1) && hasNullElements(k2)) { + return -1; // just return k1 is smaller than k2 + } else if (hasNullElements(k1)) { + return (0 - k2.size()); + } else if (hasNullElements(k2)) { + return k1.size(); + } for (int i = 0; i < k1.size() && i < k1.size(); i++) { WritableComparable key_1 = (WritableComparable) k1.get(i); WritableComparable key_2 = (WritableComparable) k2.get(i); Index: ql/src/test/queries/clientpositive/join_nulls.q =================================================================== --- ql/src/test/queries/clientpositive/join_nulls.q (revision 0) +++ ql/src/test/queries/clientpositive/join_nulls.q (revision 0) @@ -0,0 +1,53 @@ +DROP TABLE myinput1; +DROP TABLE smb_input; + +CREATE TABLE myinput1(key int, value int); +LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1; + +SELECT * FROM myinput1 a JOIN myinput1 b; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value; + +SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value; + +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; + +CREATE TABLE smb_input(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS; +LOAD DATA LOCAL INPATH '../data/files/in.txt' overwrite into table smb_input; + +SET hive.optimize.bucketmapJOIN = true; +SET hive.optimize.bucketmapJOIN.sortedmerge = true; +SELECT /*+ MAPJOIN(a) */ * FROM smb_input a JOIN smb_input b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input a JOIN smb_input b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input a LEFT OUTER JOIN smb_input b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM smb_input a RIGHT OUTER JOIN smb_input b ON a.key = b.key; + +DROP TABLE smb_input; + +DROP TABLE myinput1; Index: ql/src/test/results/clientpositive/join_nulls.q.out =================================================================== --- ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) @@ -0,0 +1,443 @@ +PREHOOK: query: DROP TABLE myinput1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE myinput1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE smb_input +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE smb_input +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@myinput1 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-17_308_5838446930686678374/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-17_308_5838446930686678374/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-20_120_8341322577551177695/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-20_120_8341322577551177695/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-22_489_3241304308426318403/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-22_489_3241304308426318403/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-24_820_7199671849344508928/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-24_820_7199671849344508928/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-27_183_8434366744617380391/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-27_183_8434366744617380391/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-29_513_6116839905212142427/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-29_513_6116839905212142427/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-31_765_3462938055596116967/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-31_765_3462938055596116967/-mr-10000 +NULL 35 NULL NULL +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-34_210_6076716565061326725/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-34_210_6076716565061326725/-mr-10000 +48 NULL NULL NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-36_569_8676716464652964538/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-36_569_8676716464652964538/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-38_860_7002153762694404288/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-38_860_7002153762694404288/-mr-10000 +NULL NULL 48 NULL +NULL NULL NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-41_110_931812807512516264/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-41_110_931812807512516264/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-43_454_4815093140147723088/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-43_454_4815093140147723088/-mr-10000 +NULL NULL 48 NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-45_990_6689172195814174497/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-45_990_6689172195814174497/-mr-10000 +NULL 35 NULL NULL +NULL NULL 48 NULL +NULL NULL NULL 35 +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-48_201_2736908106420866223/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-48_201_2736908106420866223/-mr-10000 +NULL 35 NULL NULL +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-50_710_7863961914381161328/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-50_710_7863961914381161328/-mr-10000 +48 NULL NULL NULL +NULL NULL 48 NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-52_948_8760837671586780264/-mr-10000 +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-52_948_8760837671586780264/-mr-10000 +NULL NULL NULL NULL 48 NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-55_297_4901339755544652671/-mr-10000 +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-55_297_4901339755544652671/-mr-10000 +NULL NULL 48 NULL NULL NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-57_630_52571977864060430/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-56-57_630_52571977864060430/-mr-10000 +NULL NULL NULL NULL 48 NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-02_162_829635343848198940/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-02_162_829635343848198940/-mr-10000 +NULL 35 NULL 35 +48 NULL NULL 35 +100 100 NULL 35 +NULL 35 48 NULL +48 NULL 48 NULL +100 100 48 NULL +NULL 35 100 100 +48 NULL 100 100 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-04_502_7283369972367481412/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-04_502_7283369972367481412/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-06_723_2441647202253635548/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-06_723_2441647202253635548/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-08_913_8813191266340261817/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-08_913_8813191266340261817/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-11_140_6631622191046212053/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-11_140_6631622191046212053/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-13_385_4478603738153052704/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-13_385_4478603738153052704/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-15_585_8505343119921931380/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-15_585_8505343119921931380/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-17_814_6457816032959343000/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-17_814_6457816032959343000/-mr-10000 +NULL 35 NULL NULL +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-20_019_2319552844762862049/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-20_019_2319552844762862049/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-22_269_24703276830992887/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-22_269_24703276830992887/-mr-10000 +NULL 35 NULL 35 +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-24_496_4979664935963275493/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-24_496_4979664935963275493/-mr-10000 +NULL NULL NULL 35 +NULL NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-26_739_6456362547568760541/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-26_739_6456362547568760541/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-28_986_5544469979742183325/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-28_986_5544469979742183325/-mr-10000 +NULL 35 NULL 35 +NULL NULL 48 NULL +100 100 100 100 +PREHOOK: query: CREATE TABLE smb_input(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE smb_input(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_input +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' overwrite into table smb_input +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in.txt' overwrite into table smb_input +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_input +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input a JOIN smb_input b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-31_419_6052590644502816067/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input a JOIN smb_input b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-31_419_6052590644502816067/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input a JOIN smb_input b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-33_650_2395223139450977472/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input a JOIN smb_input b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-33_650_2395223139450977472/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input a LEFT OUTER JOIN smb_input b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-35_985_1930492278435765050/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input a LEFT OUTER JOIN smb_input b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-35_985_1930492278435765050/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input a RIGHT OUTER JOIN smb_input b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-38_232_3301032456768023498/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input a RIGHT OUTER JOIN smb_input b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-18_21-57-38_232_3301032456768023498/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: DROP TABLE smb_input +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@smb_input +PREHOOK: Output: default@smb_input +POSTHOOK: query: DROP TABLE smb_input +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@smb_input +POSTHOOK: Output: default@smb_input +PREHOOK: query: DROP TABLE myinput1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@myinput1 +PREHOOK: Output: default@myinput1 +POSTHOOK: query: DROP TABLE myinput1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: default@myinput1 Index: serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (revision 987014) +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (working copy) @@ -331,6 +331,92 @@ } } + /** + * True if Object passed is representing null object. + * + * @param o The object + * @param oi The ObjectInspector + * + * @return true if the object passed is representing NULL object + * false otherwise + */ + public static boolean isNullObject(Object o, ObjectInspector oi) { + switch (oi.getCategory()) { + case PRIMITIVE: { + if (o == null) { + return true; + } + return false; + } + case LIST: { + ListObjectInspector loi = (ListObjectInspector) oi; + ObjectInspector listElementObjectInspector = loi + .getListElementObjectInspector(); + List olist = loi.getList(o); + if (olist == null) { + return true; + } else { + // there are no elements in the list + if (olist.size() == 0) { + return false; + } + // if all the elements are representing null, then return true + for (int i = 0; i < olist.size(); i++) { + if (!isNullObject(olist.get(i), listElementObjectInspector)) { + return false; + } + } + return false; + } + } + case MAP: { + MapObjectInspector moi = (MapObjectInspector) oi; + ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); + ObjectInspector mapValueObjectInspector = moi + .getMapValueObjectInspector(); + Map omap = moi.getMap(o); + if (omap == null) { + return true; + } else { + // there are no elements in the map + if (omap.entrySet().size() == 0) { + return false; + } + // if all the entries of map are representing null, then return true + for (Map.Entry entry : omap.entrySet()) { + if (!isNullObject(entry.getKey(), mapKeyObjectInspector) + || !isNullObject(entry.getValue(), mapValueObjectInspector)) { + return false; + } + } + return true; + } + } + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector) oi; + List structFields = soi.getAllStructFieldRefs(); + if (o == null) { + return true; + } else { + // there are no fields in the struct + if (structFields.size() == 0) { + return false; + } + // if all the fields of struct are representing null, then return true + for (int i = 0; i < structFields.size(); i++) { + if (!isNullObject(soi.getStructFieldData(o, structFields.get(i)), + structFields.get(i).getFieldObjectInspector())) { + return false; + } + } + return true; + } + } + default: + throw new RuntimeException("Unknown type in ObjectInspector!"); + } + } + private SerDeUtils() { // prevent instantiation }