Description
1) prepare sample data:
a=1
while [[ $a -lt 100 ]]; do echo $a ; let a=$a+1; done > data
2) prepare source hive table:
CREATE TABLE `s`(`c` string);
load data local inpath 'data' into table s;
3) prepare the bucketed table:
set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
CREATE TABLE `t`(`c` string) CLUSTERED BY (c) SORTED BY (c) INTO 5 BUCKETS;
insert into t select * from s;
4) reproduce this issue:
SET hive.auto.convert.sortmerge.join = true;
SET hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
SET hive.auto.convert.sortmerge.join.noconditionaltask = true;
SET hive.optimize.bucketmapjoin = true;
SET hive.optimize.bucketmapjoin.sortedmerge = true;
select * from t join t t1 on t.c=t1.c limit 1;