diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties
index 36dc95a..9b5845b 100644
--- itests/src/test/resources/testconfiguration.properties
+++ itests/src/test/resources/testconfiguration.properties
@@ -223,6 +223,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
auto_sortmerge_join_2.q,\
auto_sortmerge_join_3.q,\
auto_sortmerge_join_4.q,\
+ auto_sortmerge_join_5.q,\
auto_sortmerge_join_7.q,\
auto_sortmerge_join_8.q,\
auto_sortmerge_join_9.q
diff --git pom.xml pom.xml
index 8a63eb1..8333a0d 100644
--- pom.xml
+++ pom.xml
@@ -151,7 +151,7 @@
1.0.1
1.7.5
4.0.4
- 0.5.1
+ 0.5.2-SNAPSHOT
2.2.0
1.1
0.2
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
index 7487f7e..7a4e7ca 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
@@ -306,9 +306,7 @@ private void fetchNextGroup(Byte t) throws HiveException {
public void closeOp(boolean abort) throws HiveException {
joinFinalLeftData();
- if (!((joinKeysObjectInspectors != null) && (joinKeysObjectInspectors[alias] != null))) {
- super.closeOp(abort);
- }
+ super.closeOp(abort);
// clean up
for (int pos = 0; pos < order.length; pos++) {
diff --git ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out
index d238592..b42bac7 100644
--- ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out
+++ ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out
@@ -110,70 +110,41 @@ STAGE PLANS:
Stage: Stage-1
Tez
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Estimated key counts: Map 3 => 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- input vertices:
- 0 Map 3
- Position of Big Table: 1
- Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: bucket_big
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE true
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
- serialization.ddl struct bucket_big { string key, string value}
+ name default.bucket_small
+ numFiles 4
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -182,64 +153,79 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE true
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
- serialization.ddl struct bucket_big { string key, string value}
+ name default.bucket_small
+ numFiles 4
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
Truncated Path -> Alias:
- /bucket_big [b]
- Map 3
+ /bucket_small [a]
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
+ Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Position of Big Table: 1
+ Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: bucket_small
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE true
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
- serialization.ddl struct bucket_small { string key, string value}
+ name default.bucket_big
+ numFiles 2
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -248,24 +234,24 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE true
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
- serialization.ddl struct bucket_small { string key, string value}
+ name default.bucket_big
+ numFiles 2
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small [a]
+ /bucket_big [b]
Reducer 2
Needs Tagging: false
Reduce Operator Tree:
@@ -361,11 +347,10 @@ STAGE PLANS:
Stage: Stage-1
Tez
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: b
@@ -375,19 +360,11 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: bucket_small
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -431,7 +408,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small [b]
- Map 2
Map Operator Tree:
TableScan
alias: a
@@ -441,18 +417,15 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
0
1
- Estimated key counts: Map 1 => 1
keys:
0 key (type: string)
1 key (type: string)
- input vertices:
- 1 Map 1
Position of Big Table: 0
Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -473,7 +446,6 @@ STAGE PLANS:
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: bucket_big
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties: