[HIVE-22227] Tez bucket pruning produces wrong result with shared work optimization - ASF JIRA

Log work

Agile Board

Rank to Top

Rank to Bottom

Bulk Copy Attachments

Bulk Move Attachments

Voters

Watch issue

Watchers

Create sub-task

Convert to sub-task

Move

Link

Clone

Labels

Update Comment Author

Replace String in Comment

Update Comment Visibility

Delete Comments

XML

Word

Printable

JSON

Details

Type: Bug
Status: Closed
Priority: Major
Resolution: Fixed
Affects Version/s: 4.0.0
Fix Version/s: 4.0.0-alpha-1
Component/s: Query Planning
Labels:
None

Target Version/s:

4.0.0

Description

Reproducer

set hive.tez.bucket.pruning=true;
set hive.optimize.shared.work=true;

CREATE TABLE srcbucket_mapjoin_n16(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
CREATE TABLE tab_part_n10 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORCFILE;
CREATE TABLE srcbucket_mapjoin_part_n17 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;

load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n16 partition(ds='2008-04-08');
load data local inpath '.$HIVE_SRC/data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n16 partition(ds='2008-04-08');

load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');
load data local inpath '$HIVE_SRC/data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');
load data local inpath '$HIVE_SRC/data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');


set hive.optimize.bucketingsorting=false;
insert overwrite table tab_part_n10 partition (ds='2008-04-08')
select key,value from srcbucket_mapjoin_part_n17;

CREATE TABLE tab_n9(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORCFILE;
insert overwrite table tab_n9 partition (ds='2008-04-08')
select key,value from srcbucket_mapjoin_n16;

select * from
(select * from tab_n9 where tab_n9.key = 0)a
join
(select * from tab_part_n10 where tab_part_n10.key = 98)b full outer join tab_part_n10 c on a.key = b.key and b.key = c.key
order by 1,2,3,4,5,6,7,8,9;