diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index ff292a3..690c718 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -25,7 +25,6 @@ import java.util.concurrent.Callable; import java.util.concurrent.Future; -import com.esotericsoftware.kryo.KryoException; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; @@ -63,12 +62,16 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.esotericsoftware.kryo.KryoException; + /** * Map side Join operator implementation. */ @@ -239,19 +242,30 @@ protected void completeInitializationOp(Object[] os) throws HiveException { if (valueIndex == null) { return super.getValueObjectInspectors(alias, aliasToObjectInspectors); } - unwrapContainer[alias] = new UnwrapRowContainer(alias, valueIndex, hasFilter(alias)); List inspectors = aliasToObjectInspectors[alias]; - int bigPos = conf.getPosBigTable(); + Converter[] converters = new Converter[valueIndex.length]; List valueOI = new ArrayList(); for (int i = 0; i < valueIndex.length; i++) { if (valueIndex[i] >= 0 && !joinKeysObjectInspectors[bigPos].isEmpty()) { - valueOI.add(joinKeysObjectInspectors[bigPos].get(valueIndex[i])); + if (conf.getNoOuterJoin()) { + valueOI.add(joinKeysObjectInspectors[bigPos].get(valueIndex[i])); + } else { + // It is an outer join. We are going to add the inspector from the + // inner side, but the key value will come from the outer side, so + // we need to create a converter from inputOI to outputOI. + valueOI.add(inspectors.get(i)); + converters[i] = ObjectInspectorConverters.getConverter( + joinKeysObjectInspectors[bigPos].get(valueIndex[i]), inspectors.get(i)); + } } else { valueOI.add(inspectors.get(i)); } } + + unwrapContainer[alias] = new UnwrapRowContainer(alias, valueIndex, converters, hasFilter(alias)); + return valueOI; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java index e7771e6..662e33d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java @@ -18,15 +18,16 @@ package org.apache.hadoop.hive.ql.exec.persistence; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.SerDeException; - import java.io.IOException; import java.io.ObjectOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; + /** * Unwraps values from current key with valueIndex in mapjoin desc */ @@ -35,6 +36,7 @@ private final byte alias; private final int[] valueIndex; + private final Converter[] converters; private final boolean tagged; private final List unwrapped; @@ -43,9 +45,10 @@ private transient RowIterator> iterator; - public UnwrapRowContainer(byte alias, int[] valueIndex, boolean tagged) { + public UnwrapRowContainer(byte alias, int[] valueIndex, Converter[] converters, boolean tagged) { this.alias = alias; this.valueIndex = valueIndex; + this.converters = converters; this.tagged = tagged; this.unwrapped = new ArrayList(); } @@ -72,9 +75,16 @@ public MapJoinRowContainer setInternal(MapJoinRowContainer internal, Object[] cu return null; } unwrapped.clear(); - for (int index : valueIndex) { + for (int pos = 0; pos < valueIndex.length; pos++) { + int index = valueIndex[pos]; if (index >= 0) { - unwrapped.add(currentKey == null ? null : currentKey[index]); + if (currentKey == null) { + unwrapped.add(null); + } else if (converters[pos] != null) { + unwrapped.add(converters[pos].convert(currentKey[index])); + } else { + unwrapped.add(currentKey[index]); + } } else { unwrapped.add(values.get(-index - 1)); } diff --git ql/src/test/queries/clientpositive/mapjoin2.q ql/src/test/queries/clientpositive/mapjoin2.q new file mode 100644 index 0000000..fcc3bca --- /dev/null +++ ql/src/test/queries/clientpositive/mapjoin2.q @@ -0,0 +1,12 @@ +set hive.auto.convert.join=true; + +create table tbl (n bigint, t string); + +insert into tbl values (1, 'one'); +insert into tbl values(2, 'two'); + +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n; + +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n; + +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n; diff --git ql/src/test/results/clientpositive/mapjoin2.q.out ql/src/test/results/clientpositive/mapjoin2.q.out new file mode 100644 index 0000000..7bf3e6a --- /dev/null +++ ql/src/test/results/clientpositive/mapjoin2.q.out @@ -0,0 +1,56 @@ +PREHOOK: query: create table tbl (n bigint, t string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: create table tbl (n bigint, t string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: insert into tbl values (1, 'one') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@tbl +POSTHOOK: query: insert into tbl values (1, 'one') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into tbl values(2, 'two') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@tbl +POSTHOOK: query: insert into tbl values(2, 'two') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 one true true +PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +true true 2 two +PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +false false true true +true true false false