Details
-
Bug
-
Status: Resolved
-
Critical
-
Resolution: Fixed
-
Impala 2.7.0
Description
I noticed that web_sales table is missing statistics and if web_sales is substituted with catalog_sales which has stats the planner generates a plan without issues.
Exception
I0905 18:10:47.672567 83600 HdfsScanNode.java:471] computeNumNodes totalRanges=1 localRanges=1 remoteRanges=0 localHostSet.size=3 clusterNodes=7 I0905 18:10:47.672780 83600 HdfsScanNode.java:418] computeStats HdfsScan: #nodes=1 I0905 18:10:47.673322 83600 JoinNode.java:478] stats Join: cardinality=0 I0905 18:10:47.673544 83600 SingleNodePlanner.java:480] 0 chose tpcds_1000_parquet.web_sales #lhs=7305 #rhs=0 #ops=7305 I0905 18:10:47.673832 83600 JoinNode.java:478] stats Join: cardinality=0 I0905 18:10:47.674172 83600 UnionNode.java:105] stats Union: cardinality=4319968415 I0905 18:10:47.674392 83600 UnionNode.java:105] stats Union: cardinality=4319968415 I0905 18:10:47.675468 83600 JoinNode.java:478] stats Join: cardinality=0 I0905 18:10:47.675691 83600 SingleNodePlanner.java:480] 0 chose a2 #lhs=0 #rhs=1 #ops=1 I0905 18:10:47.675930 83600 JoinNode.java:478] stats Join: cardinality=0 I0905 18:10:47.676204 83600 JoinNode.java:478] stats Join: cardinality=0 I0905 18:10:47.676479 83600 JoinNode.java:478] stats Join: cardinality=0 I0905 18:10:47.676802 83600 UnionNode.java:105] stats Union: cardinality=0 I0905 18:10:47.677029 83600 UnionNode.java:105] stats Union: cardinality=0 I0905 18:10:47.678318 83600 UnionNode.java:105] stats Union: cardinality=0 I0905 18:10:47.678930 83600 SortNode.java:135] stats Sort: cardinality=0 I0905 18:10:47.723592 83600 jni-util.cc:169] java.lang.IllegalArgumentException: Comparison method violates its general contract! at java.util.TimSort.mergeHi(TimSort.java:868) at java.util.TimSort.mergeAt(TimSort.java:485) at java.util.TimSort.mergeCollapse(TimSort.java:410) at java.util.TimSort.sort(TimSort.java:214) at java.util.TimSort.sort(TimSort.java:173) at java.util.Arrays.sort(Arrays.java:659) at java.util.Collections.sort(Collections.java:217) at com.cloudera.impala.planner.RuntimeFilterGenerator.generateRuntimeFilters(RuntimeFilterGenerator.java:412) at com.cloudera.impala.planner.Planner.createPlan(Planner.java:92) at com.cloudera.impala.service.Frontend.createExecRequest(Frontend.java:975) at com.cloudera.impala.service.JniFrontend.createExecRequest(JniFrontend.java:150) I0905 18:10:47.724532 83600 status.cc:114] IllegalArgumentException: Comparison method violates its general contract! @ 0x84f189 (unknown) @ 0xba3b30 (unknown) @ 0xab8e14 (unknown) @ 0xad5097 (unknown) @ 0xadaa48 (unknown) @ 0xb19ebc (unknown) @ 0xd4df65 (unknown) @ 0xd51264 (unknown) @ 0x81ca8c (unknown) @ 0x1b4f16b (unknown) @ 0x1b36d19 (unknown) @ 0xa0ebd9 (unknown) @ 0xa0f632 (unknown) @ 0xbf4ed9 (unknown) @ 0xbf5874 (unknown) @ 0xe5a68a (unknown) @ 0x3b38e07aa1 (unknown) @ 0x3b38ae893d (unknown) I0905 18:10:47.724845 83600 impala-server.cc:919] UnregisterQuery(): query_id=81465f300f7940e5:c4e189279d41939c I0905 18:10:47.725069 83600 impala-server.cc:1005] Cancel(): query_id=81465f300f7940e5:c4e189279d41939c
Query
explain with cross_items as ( select distinct i_item_sk ss_item_sk from item, ( select t1t2t3.* from ( select t1t2.* from ( select t1.* from ( select distinct iss.i_brand_id brand_id ,iss.i_class_id class_id ,iss.i_category_id category_id from store_sales ,item iss ,date_dim d1 where ss_item_sk = iss.i_item_sk and ss_sold_date_sk = d1.d_date_sk and d1.d_year between 1999 AND 1999 + 2 ) t1 INNER JOIN ( select distinct ics.i_brand_id brand_id ,ics.i_class_id class_id ,ics.i_category_id category_id from catalog_sales ,item ics ,date_dim d2 where cs_item_sk = ics.i_item_sk and cs_sold_date_sk = d2.d_date_sk and d2.d_year between 1999 AND 1999 + 2 ) t2 ON t1.brand_id = t2.brand_id and t1.class_id = t2.class_id and t1.category_id = t2.category_id ) t1t2 INNER JOIN ( select distinct iws.i_brand_id brand_id ,iws.i_class_id class_id ,iws.i_category_id category_id from web_sales ,item iws ,date_dim d3 where ws_item_sk = iws.i_item_sk and ws_sold_date_sk = d3.d_date_sk and d3.d_year between 1999 AND 1999 + 2 ) t3 ON t1t2.brand_id = t3.brand_id and t1t2.class_id = t3.class_id and t1t2.category_id = t3.category_id ) t1t2t3 ) t1t2t3_1 where i_brand_id = brand_id and i_class_id = class_id and i_category_id = category_id ), avg_sales as (select avg(quantity*list_price) average_sales from (select ss_quantity quantity ,ss_list_price list_price from store_sales ,date_dim where ss_sold_date_sk = d_date_sk and d_year between 1999 and 2001 union all select cs_quantity quantity ,cs_list_price list_price from catalog_sales ,date_dim where cs_sold_date_sk = d_date_sk and d_year between 1998 and 2000 union all select ws_quantity quantity ,ws_list_price list_price from web_sales ,date_dim where ws_sold_date_sk = d_date_sk and d_year between 1998 and 1998 + 2) x) , results AS (select channel, i_brand_id, i_class_id, i_category_id, sum(sales) sum_sales, sum(number_sales) number_sales from ( select * from ( select * from (select channel, i_brand_id,i_class_id,i_category_id, sales, number_sales from (select 'store' channel, i_brand_id,i_class_id ,i_category_id,sum(ss_quantity*ss_list_price) sales , count(*) number_sales from store_sales ,item ,date_dim where ss_item_sk in (select ss_item_sk from cross_items) and ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and d_year = 1998+2 and d_moy = 11 group by i_brand_id,i_class_id,i_category_id ) a1 cross join (select average_sales from avg_sales) a2 where a1.sales > a2.average_sales ) a12 ) a121 union all select * from (select * from (select channel, i_brand_id,i_class_id,i_category_id, sales, number_sales from ( select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales from catalog_sales ,item ,date_dim where cs_item_sk in (select ss_item_sk from cross_items) and cs_item_sk = i_item_sk and cs_sold_date_sk = d_date_sk and d_year = 1998+2 and d_moy = 11 group by i_brand_id,i_class_id,i_category_id ) a1 cross join (select average_sales from avg_sales) a2 where a1.sales > a2.average_sales ) a12 ) a121 --having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) union all select * from (select * from (select channel, i_brand_id,i_class_id,i_category_id, sales, number_sales from ( select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales from web_sales ,item ,date_dim where ws_item_sk in (select ss_item_sk from cross_items) and ws_item_sk = i_item_sk and ws_sold_date_sk = d_date_sk and ws_sold_date_sk between 2451850 and 2451879 and d_year = 1998+2 and d_moy = 11 group by i_brand_id,i_class_id,i_category_id ) a1 cross join (select average_sales from avg_sales) a2 where a1.sales > a2.average_sales ) a12 ) a121 --having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) ) y group by channel, i_brand_id,i_class_id,i_category_id) select channel, i_brand_id, i_class_id, i_category_id, sum_sales, number_sales from ( select channel, i_brand_id, i_class_id, i_category_id, sum_sales, number_sales from results union select channel, i_brand_id, i_class_id, null as i_category_id, sum(sum_sales), sum(number_sales) from results group by channel, i_brand_id, i_class_id union select channel, i_brand_id, null as i_class_id, null as i_category_id, sum(sum_sales), sum(number_sales) from results group by channel, i_brand_id union select channel, null as i_brand_id, null as i_class_id, null as i_category_id, sum(sum_sales), sum(number_sales) from results group by channel union select null as channel, null as i_brand_id, null as i_class_id, null as i_category_id, sum(sum_sales), sum(number_sales) from results) z order by channel, i_brand_id, i_class_id, i_category_id limit 100
web_sales stats
Column | Type | #Distinct Values | #Nulls | Max Size | Avg Size |
---|---|---|---|---|---|
ws_sold_time_sk | INT | 0 | -1 | 4 | 4 |
ws_ship_date_sk | INT | 0 | -1 | 4 | 4 |
ws_item_sk | BIGINT | 0 | -1 | 8 | 8 |
ws_bill_customer_sk | INT | 0 | -1 | 4 | 4 |
ws_bill_cdemo_sk | INT | 0 | -1 | 4 | 4 |
ws_bill_hdemo_sk | INT | 0 | -1 | 4 | 4 |
ws_bill_addr_sk | INT | 0 | -1 | 4 | 4 |
ws_ship_customer_sk | INT | 0 | -1 | 4 | 4 |
ws_ship_cdemo_sk | INT | 0 | -1 | 4 | 4 |
ws_ship_hdemo_sk | INT | 0 | -1 | 4 | 4 |
ws_ship_addr_sk | INT | 0 | -1 | 4 | 4 |
ws_web_page_sk | INT | 0 | -1 | 4 | 4 |
ws_web_site_sk | INT | 0 | -1 | 4 | 4 |
ws_ship_mode_sk | INT | 0 | -1 | 4 | 4 |
ws_warehouse_sk | INT | 0 | -1 | 4 | 4 |
ws_promo_sk | INT | 0 | -1 | 4 | 4 |
ws_order_number | BIGINT | 0 | -1 | 8 | 8 |
ws_quantity | INT | 0 | -1 | 4 | 4 |
ws_wholesale_cost | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_list_price | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_sales_price | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_ext_discount_amt | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_ext_sales_price | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_ext_wholesale_cost | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_ext_list_price | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_ext_tax | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_coupon_amt | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_ext_ship_cost | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_net_paid | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_net_paid_inc_tax | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_net_paid_inc_ship | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_net_paid_inc_ship_tax | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_net_profit | DECIMAL(7,2) | 0 | -1 | 4 | 4 |
ws_sold_date_sk | INT | 1824 | 1 | 4 | 4 |
With runtime filters disabled
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
| Explain String |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
| Estimated Per-Host Requirements: Memory=330.12MB VCores=4 |
| WARNING: The following tables are missing relevant table and/or column statistics. |
| tpcds_1000_parquet.web_sales