diff --git build-common.xml build-common.xml index a50b032..0807827 100644 --- build-common.xml +++ build-common.xml @@ -59,7 +59,7 @@ - + diff --git data/files/leftsemijoin_mr_t1.txt data/files/leftsemijoin_mr_t1.txt new file mode 100644 index 0000000..6ed281c --- /dev/null +++ data/files/leftsemijoin_mr_t1.txt @@ -0,0 +1,2 @@ +1 +1 diff --git data/files/leftsemijoin_mr_t2.txt data/files/leftsemijoin_mr_t2.txt new file mode 100644 index 0000000..179ef0e --- /dev/null +++ data/files/leftsemijoin_mr_t2.txt @@ -0,0 +1,20 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java index 490da1d..c594ba0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java @@ -93,7 +93,8 @@ public void processOp(Object row, int tag) throws HiveException { .toString()); List keyObject = (List) soi.getStructFieldData(row, sf); // Are we consuming too much memory - if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) { + if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) && + !hasLeftSemiJoin) { if (sz == joinEmitInterval) { // The input is sorted by alias, so if we are already in the last join // operand, diff --git ql/src/test/queries/clientpositive/leftsemijoin_mr.q ql/src/test/queries/clientpositive/leftsemijoin_mr.q new file mode 100644 index 0000000..5813ca3 --- /dev/null +++ ql/src/test/queries/clientpositive/leftsemijoin_mr.q @@ -0,0 +1,20 @@ +CREATE TABLE T1(key INT); +LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1; +CREATE TABLE T2(key INT); +LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2; + +-- Run this query using TestMinimrCliDriver + +SELECT * FROM T1; +SELECT * FROM T2; + +set hive.auto.convert.join=false; +set mapred.reduce.tasks=2; + +set hive.join.emit.interval=100; + +SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key); + +set hive.join.emit.interval=1; + +SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key); diff --git ql/src/test/results/clientpositive/leftsemijoin_mr.q.out ql/src/test/results/clientpositive/leftsemijoin_mr.q.out new file mode 100644 index 0000000..6d8a468 --- /dev/null +++ ql/src/test/results/clientpositive/leftsemijoin_mr.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: CREATE TABLE T1(key INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE T1(key INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE T2(key INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@t2 +PREHOOK: query: -- Run this query using TestMinimrCliDriver + +SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: -- Run this query using TestMinimrCliDriver + +SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 +1 +PREHOOK: query: SELECT * FROM T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 +PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1