Index: data/files/in1.txt =================================================================== --- data/files/in1.txt (revision 0) +++ data/files/in1.txt (revision 0) @@ -0,0 +1,3 @@ +35 +48 +100100 Index: data/files/in2.txt =================================================================== --- data/files/in2.txt (revision 0) +++ data/files/in2.txt (revision 0) @@ -0,0 +1,3 @@ +135 +148 +200200 Index: ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (revision 987986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (working copy) @@ -130,4 +130,18 @@ public int getType() { return OperatorType.MAPJOIN; } + + // returns true if there are elements in key list and all of them are null + protected boolean hasNullElements(ArrayList key) { + if (key != null && key.size() > 0) { + for (Object k : key) { + if (k != null) { + return false; + } + } + return true; + } + return false; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (revision 987986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.LongWritable; @@ -86,6 +87,10 @@ // number of rows for the key in the given table int sz = storage.get(alias).size(); + StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; + StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY + .toString()); + Object keyObject = soi.getStructFieldData(row, sf); // Are we consuming too much memory if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) { @@ -105,10 +110,6 @@ // operand // We won't output a warning for the last join operand since the size // will never goes to joinEmitInterval. - StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; - StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY - .toString()); - Object keyObject = soi.getStructFieldData(row, sf); LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject); nextSz = getNextSize(nextSz); @@ -117,6 +118,11 @@ // Add the value to the vector storage.get(alias).add(nr); + // if join-key is null, process each row in different group. + if (SerDeUtils.isNullObject(keyObject, sf.getFieldObjectInspector())) { + endGroup(); + startGroup(); + } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 987986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -282,7 +282,8 @@ MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); MapJoinObjectValue o = mapJoinTables.get(pos).get(keyMap); - if (o == null) { + // there is no join-value or join-key has null elements + if (o == null || (hasNullElements(key))) { if (noOuterJoin) { storage.put(pos, emptyList); } else { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (revision 987986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (working copy) @@ -383,6 +383,15 @@ private int compareKeys (ArrayList k1, ArrayList k2) { int ret = 0; + boolean k1HasNulls = hasNullElements(k1); + boolean k2HasNulls = hasNullElements(k2); + if (k1HasNulls && k2HasNulls) { + return -1; // just return k1 is smaller than k2 + } else if (k1HasNulls) { + return (0 - k2.size()); + } else if (k2HasNulls) { + return k1.size(); + } for (int i = 0; i < k1.size() && i < k1.size(); i++) { WritableComparable key_1 = (WritableComparable) k1.get(i); WritableComparable key_2 = (WritableComparable) k2.get(i); Index: ql/src/test/queries/clientpositive/join_nulls.q =================================================================== --- ql/src/test/queries/clientpositive/join_nulls.q (revision 0) +++ ql/src/test/queries/clientpositive/join_nulls.q (revision 0) @@ -0,0 +1,70 @@ +DROP TABLE myinput1; +DROP TABLE smb_input1; +DROP TABLE smb_input2; + +CREATE TABLE myinput1(key int, value int); +LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1; + +SELECT * FROM myinput1 a JOIN myinput1 b; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key; +SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value; + +SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value; + +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; + +CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; +LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1; +LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1; +LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2; +LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2; + +SET hive.optimize.bucketmapJOIN = true; +SET hive.optimize.bucketmapJOIN.sortedmerge = true; +SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; + +SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key; + +SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value; + +SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value; +SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value; +SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value; + +DROP TABLE smb_input2; +DROP TABLE smb_input1; +DROP TABLE myinput1; Index: ql/src/test/results/clientpositive/join_nulls.q.out =================================================================== --- ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_nulls.q.out (revision 0) @@ -0,0 +1,593 @@ +PREHOOK: query: DROP TABLE myinput1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE myinput1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE smb_input1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE smb_input1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE smb_input2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE smb_input2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@myinput1 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-03_280_8046041210976652930/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-03_280_8046041210976652930/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-05_875_5517964883460325329/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-05_875_5517964883460325329/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-08_180_652283667841771175/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-08_180_652283667841771175/-mr-10000 +NULL 35 NULL 35 +NULL 35 48 NULL +NULL 35 100 100 +48 NULL NULL 35 +48 NULL 48 NULL +48 NULL 100 100 +100 100 NULL 35 +100 100 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-10_550_331632076666868103/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-10_550_331632076666868103/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-12_983_1914478148736644860/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-12_983_1914478148736644860/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-15_312_4969150263861238554/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-15_312_4969150263861238554/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-17_911_8467865775268581329/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-17_911_8467865775268581329/-mr-10000 +NULL 35 NULL NULL +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-20_402_7798694547101467144/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-20_402_7798694547101467144/-mr-10000 +48 NULL NULL NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-22_735_2984219203211963003/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-22_735_2984219203211963003/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-25_265_9051318940646862682/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-25_265_9051318940646862682/-mr-10000 +NULL NULL 48 NULL +NULL NULL NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-27_561_4116994049353390978/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-27_561_4116994049353390978/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-29_821_1879945304650943837/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-29_821_1879945304650943837/-mr-10000 +NULL NULL 48 NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-32_128_4027110272276947423/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-32_128_4027110272276947423/-mr-10000 +NULL 35 NULL NULL +NULL NULL 48 NULL +NULL NULL NULL 35 +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-34_460_6844117156381771664/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-34_460_6844117156381771664/-mr-10000 +NULL 35 NULL NULL +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-36_726_8512052125040288540/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-36_726_8512052125040288540/-mr-10000 +48 NULL NULL NULL +NULL NULL 48 NULL +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-38_978_4695378175534485685/-mr-10000 +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-38_978_4695378175534485685/-mr-10000 +NULL NULL NULL NULL 48 NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-41_249_4146080071354453696/-mr-10000 +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-41_249_4146080071354453696/-mr-10000 +NULL NULL 48 NULL NULL NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-43_499_2981042407379688791/-mr-10000 +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-43_499_2981042407379688791/-mr-10000 +NULL NULL NULL NULL 48 NULL +NULL 35 NULL 35 NULL 35 +100 100 100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-47_968_6102142618328136353/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-47_968_6102142618328136353/-mr-10000 +NULL 35 NULL 35 +48 NULL NULL 35 +100 100 NULL 35 +NULL 35 48 NULL +48 NULL 48 NULL +100 100 48 NULL +NULL 35 100 100 +48 NULL 100 100 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-50_296_7390997343422021290/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-50_296_7390997343422021290/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-52_489_4221573496502023530/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-52_489_4221573496502023530/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-54_723_6968223462495586790/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-54_723_6968223462495586790/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-56_949_3964977709025870428/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-56_949_3964977709025870428/-mr-10000 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-59_182_7413184504353487219/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-28-59_182_7413184504353487219/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-01_422_1480771885956854218/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-01_422_1480771885956854218/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-03_664_4310158296018837643/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-03_664_4310158296018837643/-mr-10000 +NULL 35 NULL NULL +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-05_881_8801239321458133315/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-05_881_8801239321458133315/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-08_117_7502164153286310808/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-08_117_7502164153286310808/-mr-10000 +NULL 35 NULL 35 +48 NULL NULL NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-10_346_5197666713254635791/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-10_346_5197666713254635791/-mr-10000 +NULL NULL NULL 35 +NULL NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-12_548_4859462779237074856/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-12_548_4859462779237074856/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-14_746_1347229083562992298/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-14_746_1347229083562992298/-mr-10000 +NULL 35 NULL 35 +NULL NULL 48 NULL +100 100 100 100 +PREHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2 +PREHOOK: type: LOAD +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-17_664_2661868080479438636/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-17_664_2661868080479438636/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +148 NULL 148 NULL +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-19_908_386499455243025916/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-19_908_386499455243025916/-mr-10000 +NULL NULL NULL 35 +48 NULL 48 NULL +100 100 100 100 +NULL NULL NULL 135 +148 NULL 148 NULL +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-22_137_7242908150853843685/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-22_137_7242908150853843685/-mr-10000 +48 NULL 48 NULL +100 100 100 100 +148 NULL 148 NULL +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-24_373_7779562609078244748/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-24_373_7779562609078244748/-mr-10000 +NULL 35 NULL NULL +48 NULL 48 NULL +100 100 100 100 +NULL 135 NULL NULL +148 NULL 148 NULL +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-26_570_2646766285118940058/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-26_570_2646766285118940058/-mr-10000 +100 100 100 100 +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-28_833_599146201019338170/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-28_833_599146201019338170/-mr-10000 +100 100 100 100 +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-31_084_8098260038026761283/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-31_084_8098260038026761283/-mr-10000 +NULL 35 NULL NULL +48 NULL NULL NULL +100 100 100 100 +NULL 135 NULL NULL +148 NULL NULL NULL +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-33_309_2716249447123267045/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-33_309_2716249447123267045/-mr-10000 +NULL NULL NULL 35 +NULL NULL 48 NULL +100 100 100 100 +NULL NULL NULL 135 +NULL NULL 148 NULL +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-35_549_6697798798919520805/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-35_549_6697798798919520805/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +NULL 135 NULL 135 +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-37_937_8090467934016706988/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-37_937_8090467934016706988/-mr-10000 +NULL 35 NULL 35 +NULL NULL 48 NULL +100 100 100 100 +NULL 135 NULL 135 +NULL NULL 148 NULL +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-40_210_3824687609882363536/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-40_210_3824687609882363536/-mr-10000 +NULL 35 NULL 35 +100 100 100 100 +NULL 135 NULL 135 +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-42_436_6357789381809664907/-mr-10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-22_21-29-42_436_6357789381809664907/-mr-10000 +NULL 35 NULL 35 +48 NULL NULL NULL +100 100 100 100 +NULL 135 NULL 135 +148 NULL NULL NULL +200 200 200 200 +PREHOOK: query: DROP TABLE smb_input2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@smb_input2 +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: DROP TABLE smb_input2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@smb_input2 +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: DROP TABLE smb_input1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@smb_input1 +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: DROP TABLE smb_input1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: DROP TABLE myinput1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@myinput1 +PREHOOK: Output: default@myinput1 +POSTHOOK: query: DROP TABLE myinput1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@myinput1 +POSTHOOK: Output: default@myinput1 Index: serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (revision 987986) +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (working copy) @@ -331,6 +331,92 @@ } } + /** + * True if Object passed is representing null object. + * + * @param o The object + * @param oi The ObjectInspector + * + * @return true if the object passed is representing NULL object + * false otherwise + */ + public static boolean isNullObject(Object o, ObjectInspector oi) { + switch (oi.getCategory()) { + case PRIMITIVE: { + if (o == null) { + return true; + } + return false; + } + case LIST: { + ListObjectInspector loi = (ListObjectInspector) oi; + ObjectInspector listElementObjectInspector = loi + .getListElementObjectInspector(); + List olist = loi.getList(o); + if (olist == null) { + return true; + } else { + // there are no elements in the list + if (olist.size() == 0) { + return false; + } + // if all the elements are representing null, then return true + for (int i = 0; i < olist.size(); i++) { + if (!isNullObject(olist.get(i), listElementObjectInspector)) { + return false; + } + } + return false; + } + } + case MAP: { + MapObjectInspector moi = (MapObjectInspector) oi; + ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); + ObjectInspector mapValueObjectInspector = moi + .getMapValueObjectInspector(); + Map omap = moi.getMap(o); + if (omap == null) { + return true; + } else { + // there are no elements in the map + if (omap.entrySet().size() == 0) { + return false; + } + // if all the entries of map are representing null, then return true + for (Map.Entry entry : omap.entrySet()) { + if (!isNullObject(entry.getKey(), mapKeyObjectInspector) + || !isNullObject(entry.getValue(), mapValueObjectInspector)) { + return false; + } + } + return true; + } + } + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector) oi; + List structFields = soi.getAllStructFieldRefs(); + if (o == null) { + return true; + } else { + // there are no fields in the struct + if (structFields.size() == 0) { + return false; + } + // if all the fields of struct are representing null, then return true + for (int i = 0; i < structFields.size(); i++) { + if (!isNullObject(soi.getStructFieldData(o, structFields.get(i)), + structFields.get(i).getFieldObjectInspector())) { + return false; + } + } + return true; + } + } + default: + throw new RuntimeException("Unknown type in ObjectInspector!"); + } + } + private SerDeUtils() { // prevent instantiation }