Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Fixed
-
4.0.0
-
None
Description
- Reproducer
--! qt:dataset:lineitem --! qt:dataset:part --! qt:dataset:src set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; --set hive.compute.query.using.stats=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.optimize.ppd=true; set hive.ppd.remove.duplicatefilters=true; set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.stats.autogather=true; set hive.tez.bigtable.minsize.semijoin.reduction=1; set hive.tez.min.bloom.filter.entries=1; set hive.stats.fetch.column.stats=true; set hive.tez.bloom.filter.factor=1.0f; set hive.auto.convert.join=false; set hive.optimize.shared.work=false; create database tpch_test; use tpch_test; CREATE TABLE `customer`( `c_custkey` bigint, `c_name` string, `c_address` string, `c_nationkey` bigint, `c_phone` string, `c_acctbal` double, `c_mktsegment` string, `c_comment` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' TBLPROPERTIES ( 'bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default', 'transient_lastDdlTime'='1543026723'); CREATE TABLE `lineitem`( `l_orderkey` bigint, `l_partkey` bigint, `l_suppkey` bigint, `l_linenumber` int, `l_quantity` double, `l_extendedprice` double, `l_discount` double, `l_tax` double, `l_returnflag` string, `l_linestatus` string, `l_shipdate` string, `l_commitdate` string, `l_receiptdate` string, `l_shipinstruct` string, `l_shipmode` string, `l_comment` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' TBLPROPERTIES ( 'bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default', 'transient_lastDdlTime'='1543027179'); CREATE TABLE `orders`( `o_orderkey` bigint, `o_custkey` bigint, `o_orderstatus` string, `o_totalprice` double, `o_orderdate` string, `o_orderpriority` string, `o_clerk` string, `o_shippriority` int, `o_comment` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' TBLPROPERTIES ( 'bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default', 'transient_lastDdlTime'='1543026824'); alter table customer update statistics set('numRows'='150000000','rawDataSize'='8633707142'); alter table lineitem update statistics set('numRows'='5999989709','rawDataSize'='184245066955'); alter table orders update statistics set('numRows'='1500000000','rawDataSize'='46741318253'); create view q18_tmp_cached as select l_orderkey, sum(l_quantity) as t_sum_quantity from lineitem where l_orderkey is not null group by l_orderkey; -- Set bloom filter size to huge number so we get any possible semijoin reductions set hive.tez.min.bloom.filter.entries=0; set hive.tez.max.bloom.filter.entries=1; create table q18_large_volume_customer_cached stored as orc tblproperties ('transactional'='true', 'transactional_properties'='default') as select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) from customer, orders, q18_tmp_cached t, lineitem l where c_custkey = o_custkey and o_orderkey = t.l_orderkey and o_orderkey is not null and t.t_sum_quantity > 300 and o_orderkey = l.l_orderkey and l.l_orderkey is not null group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate limit 100; drop database tpch_test cascade;
To reproduce run the above as TestMiniLlapLocalCliDriver test