diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 36dc95a..9b5845b 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -223,6 +223,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ auto_sortmerge_join_2.q,\ auto_sortmerge_join_3.q,\ auto_sortmerge_join_4.q,\ + auto_sortmerge_join_5.q,\ auto_sortmerge_join_7.q,\ auto_sortmerge_join_8.q,\ auto_sortmerge_join_9.q diff --git pom.xml pom.xml index 8a63eb1..8333a0d 100644 --- pom.xml +++ pom.xml @@ -151,7 +151,7 @@ 1.0.1 1.7.5 4.0.4 - 0.5.1 + 0.5.2-SNAPSHOT 2.2.0 1.1 0.2 diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java index 7487f7e..7a4e7ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java @@ -306,9 +306,7 @@ private void fetchNextGroup(Byte t) throws HiveException { public void closeOp(boolean abort) throws HiveException { joinFinalLeftData(); - if (!((joinKeysObjectInspectors != null) && (joinKeysObjectInspectors[alias] != null))) { - super.closeOp(abort); - } + super.closeOp(abort); // clean up for (int pos = 0; pos < order.length; pos++) { diff --git ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out index d238592..b42bac7 100644 --- ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out @@ -110,70 +110,41 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: bucket_big input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 - serialization.ddl struct bucket_big { string key, string value} + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -182,64 +153,79 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 - serialization.ddl struct bucket_big { string key, string value} + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big [b] - Map 3 + /bucket_small [a] Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: bucket_small input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -248,24 +234,24 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small [a] + /bucket_big [b] Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -361,11 +347,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -375,19 +360,11 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: bucket_small input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -431,7 +408,6 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small [b] - Map 2 Map Operator Tree: TableScan alias: a @@ -441,18 +417,15 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - Estimated key counts: Map 1 => 1 keys: 0 key (type: string) 1 key (type: string) - input vertices: - 1 Map 1 Position of Big Table: 0 Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -473,7 +446,6 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: bucket_big input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: